Phi-3 Mini June Model update (#129)

* Re-computed and update Microsoft Phi numbers * Trained a new model based on June Phi and computed benchmarks --------- Co-authored-by: Yingbei <[email protected]>
rubra-ai · Jul 9, 2024 · f6b4c1c · f6b4c1c
1 parent eb30d06
commit f6b4c1c
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ View full benchmark results for Rubra models and other models here: https://docs
 | [**Rubra Llama-3 8B Instruct**](https://huggingface.co/rubra-ai/Meta-Llama-3-8B-Instruct)        | 89.28%           | 64.39         | 31.70         | 68.99                | 23.76              | 8.03     |
 | [**Rubra Qwen2 7B Instruct**](https://huggingface.co/rubra-ai/Qwen2-7B-Instruct)                 | 85.71%           | 68.88         | 30.36         | 75.82                | 28.72              | 8.08     |
 | [**Rubra Mistral 7B Instruct v0.3**](https://huggingface.co/rubra-ai/Mistral-7B-Instruct-v0.3)   | 73.57%           | 59.12         | 29.91         | 43.29                | 11.14              | 7.69     |
-| [**Rubra Phi-3 Mini 128k Instruct**](https://huggingface.co/rubra-ai/Phi-3-mini-128k-instruct)   | 65.71%           | 66.66         | 29.24         | 74.09                | 26.84              | 7.45     |
+| [**Rubra Phi-3 Mini 128k Instruct**](https://huggingface.co/rubra-ai/Phi-3-mini-128k-instruct)   | 70.00%           | 66.66         | 29.24         | 74.09                | 26.84              | 7.45     |
 | [**Rubra Mistral 7B Instruct v0.2**](https://huggingface.co/rubra-ai/Mistral-7B-Instruct-v0.2)   | 69.28%           | 58.90         | 29.91         | 34.12                | 8.36               | 7.36     |
 | [**Rubra Gemma-1.1 2B Instruct**](https://huggingface.co/rubra-ai/gemma-1.1-2b-it)               | 45.00%           | 38.85         | 24.55         | 6.14                 | 2.38               | 5.75     |
 

diff --git a/docs/docs/models/Phi.md b/docs/docs/models/Phi.md
@@ -43,32 +43,34 @@ Phi-3 is a state of the art, lightweight model. It performs exceptionally well d
     <tr>
       <td>Phi-3 Mini 128k Instruct</td>
       <td>-</td>
-      <td>68.17</td>
-      <td>30.58</td>
-      <td>80.44</td>
-      <td>28.12</td>
-      <td>7.92</td>
-      <td>51</td>
-      <td>45</td>
-      <td>64</td>
-      <td>0.31875</td>
-      <td>0.28125</td>
-      <td><strong>0.51875</strong></td>
+      <td>69.36</td>
+      <td>27.01</td>
+      <td>83.7</td>
+      <td>32.92</td>
+      <td>8.02</td>
+      <td>21</td>
+      <td>72</td>
+      <td>67</td>
+      <td>0.13125</td>
+      <td>0.45000</td>
+      <td>0.340625</td>
     </tr>
     <tr>
       <td>Rubra Enhanced Phi-3 Mini 128k Instruct</td>
-      <td>65.71%</td>
-      <td>66.66</td>
-      <td>29.24</td>
-      <td>74.09</td>
-      <td>26.84</td>
-      <td>7.45</td>
-      <td>45</td>
-      <td>51</td>
-      <td>64</td>
-      <td>0.28125</td>
-      <td>0.31875</td>
-      <td>0.48125</td>
+      <td>70.0%</td>
+      <td>67.87</td>
+      <td>29.69</td>
+      <td>79.45</td>
+      <td>30.80 </td>
+      <td>8.21</td>
+      <td>72</td>
+      <td>21</td>
+      <td>67</td>
+      <td>0.45000</td>
+      <td>0.13125</td>
+      <td><strong>0.659375</strong></td>
     </tr>
   </tbody>
-</table>
+</table>
+
+* The above is based on the Phi-3 Mini that was updated by Microsoft in June 2024. The original Phi-3 mini came out in April and the Rubra enhanced model has been trained on both versions
diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js
@@ -117,21 +117,21 @@ const data = [
         model: 'Phi-3 Mini 128k Instruct',
         params: 3.82,
         functionCalling: '-',
-        mmlu: '68.17',
-        gpqa: '30.58',
-        gsm8k: '80.44',
-        math: '28.12',
-        mtBench: '7.92',
+        mmlu: '69.36',
+        gpqa: '27.01',
+        gsm8k: '83.7',
+        math: '32.92',
+        mtBench: '8.02',
     },
     {
         model: 'Rubra Phi-3 Mini 128k Instruct',
-        params: 4.27,
-        functionCalling: '65.71%',
-        mmlu: '66.66',
-        gpqa: '29.24',
-        gsm8k: '74.09',
-        math: '26.84',
-        mtBench: '7.45',
+        params: 4.73,
+        functionCalling: '70.00%',
+        mmlu: '67.87',
+        gpqa: '29.69',
+        gsm8k: '79.45',
+        math: '30.80',
+        mtBench: '8.21',
     },
     {
         model: 'Qwen2-7B-Instruct',