Skip to content

Commit

Permalink
✨ adds phi models
Browse files Browse the repository at this point in the history
  • Loading branch information
chriamue committed Jan 7, 2024
1 parent 0f26964 commit c702876
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 49 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,18 @@ cargo build --release

### Running

Run the server

```bash
cargo run --release
```

Run one of the models

```bash
cargo run --release -- --model phi-v2 --prompt 'write me fibonacci in rust'
```

### Docker

```bash
Expand Down Expand Up @@ -68,6 +76,7 @@ python test.py
- [x] Zephyr
- [x] OpenChat
- [x] Starling
- [x] [Phi](https://huggingface.co/microsoft/phi-2) (Phi-1, Phi-1.5, Phi-2)
- [ ] GPT-Neo
- [ ] GPT-J
- [ ] Llama
Expand All @@ -77,6 +86,10 @@ python test.py

["lmz/candle-mistral"](https://huggingface.co/lmz/candle-mistral)

### Phi

["microsoft/phi-2"](https://huggingface.co/microsoft/phi-2)

## Performance

The following table shows the performance metrics of the model on different systems:
Expand Down
1 change: 1 addition & 0 deletions src/llm/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ pub fn create_model(
| Models::L70bChat
| Models::OpenChat35
| Models::Starling7bAlpha => 8,
Models::PhiHermes | Models::PhiV1 | Models::PhiV1_5 | Models::PhiV2 => 4,
};
ModelWeights::from_ggml(content, default_gqa)?
}
Expand Down
74 changes: 27 additions & 47 deletions src/llm/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ pub enum Models {
Mixtral,
#[serde(rename = "mixtral-instruct")]
MixtralInstruct,

#[serde(rename = "phi-hermes")]
PhiHermes,
#[serde(rename = "phi-v1")]
PhiV1,
#[serde(rename = "phi-v1.5")]
PhiV1_5,
#[serde(rename = "phi-v2")]
PhiV2,
}

#[derive(Deserialize)]
Expand All @@ -62,19 +71,6 @@ impl FromStr for Models {
impl Models {
pub fn is_mistral(&self) -> bool {
match self {
Self::L7b
| Self::L13b
| Self::L70b
| Self::L7bChat
| Self::L13bChat
| Self::L70bChat
| Self::L7bCode
| Self::L13bCode
| Self::L34bCode
| Self::Leo7b
| Self::Leo13b => false,
// Zephyr and OpenChat are fine tuned versions of mistral and should be treated in the
// same way. Starling is a fine tuned version of OpenChat.
Self::OpenChat35
| Self::Starling7bAlpha
| Self::Zephyr7bAlpha
Expand All @@ -83,52 +79,28 @@ impl Models {
| Self::MixtralInstruct
| Self::Mistral7b
| Self::Mistral7bInstruct => true,
_ => false,
}
}

pub fn is_zephyr(&self) -> bool {
match self {
Self::L7b
| Self::L13b
| Self::L70b
| Self::L7bChat
| Self::L13bChat
| Self::L70bChat
| Self::L7bCode
| Self::L13bCode
| Self::L34bCode
| Self::Leo7b
| Self::Leo13b
| Self::Mixtral
| Self::MixtralInstruct
| Self::Mistral7b
| Self::Mistral7bInstruct
| Self::OpenChat35
| Self::Starling7bAlpha => false,
Self::Zephyr7bAlpha | Self::Zephyr7bBeta => true,
_ => false,
}
}

pub fn is_open_chat(&self) -> bool {
match self {
Self::L7b
| Self::L13b
| Self::L70b
| Self::L7bChat
| Self::L13bChat
| Self::L70bChat
| Self::L7bCode
| Self::L13bCode
| Self::L34bCode
| Self::Leo7b
| Self::Leo13b
| Self::Mixtral
| Self::MixtralInstruct
| Self::Mistral7b
| Self::Mistral7bInstruct
| Self::Zephyr7bAlpha
| Self::Zephyr7bBeta => false,
Self::OpenChat35 | Self::Starling7bAlpha => true,
_ => false,
}
}

pub fn is_phi(&self) -> bool {
match self {
Self::PhiHermes | Self::PhiV1 | Self::PhiV1_5 | Self::PhiV2 => true,
_ => false,
}
}

Expand All @@ -153,6 +125,10 @@ impl Models {
| Models::Zephyr7bBeta => "mistralai/Mistral-7B-v0.1",
Models::OpenChat35 => "openchat/openchat_3.5",
Models::Starling7bAlpha => "berkeley-nest/Starling-LM-7B-alpha",
Models::PhiV1 => "microsoft/phi-1",
Models::PhiV1_5 => "microsoft/phi-1.5",
Models::PhiV2 => "microsoft/phi-2",
Models::PhiHermes => "lmz/candle-quantized-phi",
}
}

Expand Down Expand Up @@ -210,6 +186,10 @@ impl Models {
"TheBloke/Starling-LM-7B-alpha-GGUF",
"starling-lm-7b-alpha.Q4_K_M.gguf",
),
Models::PhiV1 => ("lmz/candle-quantized-phi", "model-v1-q4k.gguf"),
Models::PhiV1_5 => ("lmz/candle-quantized-phi", "model-q4k.gguf"),
Models::PhiV2 => ("lmz/candle-quantized-phi", "model-v2-q4k.gguf"),
Models::PhiHermes => ("lmz/candle-quantized-phi", "model-phi-hermes-1_3B-q4k.gguf"),
}
}
}
4 changes: 2 additions & 2 deletions src/llm/text_generation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ pub fn create_text_generation(
model: Models,
cache_dir: &Option<PathBuf>,
) -> Result<TextGeneration, Box<dyn std::error::Error>> {
let tokenizer = create_tokenizer(model)?;
let model = create_model(model, cache_dir)?;
let tokenizer = create_tokenizer(model).expect("Failed to create tokenizer");
let model = create_model(model, cache_dir).expect("Failed to create model");

let device = Device::Cpu;

Expand Down

0 comments on commit c702876

Please sign in to comment.