Skip to content

Commit

Permalink
Merge branch 'main' into runners2
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexCheema committed Jan 20, 2025
2 parents 023ddc2 + b5cbcbc commit 218c1e7
Show file tree
Hide file tree
Showing 39 changed files with 3,025 additions and 288 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,5 @@ cython_debug/

**/*.xcodeproj/*
.aider*

exo/tinychat/images/*.png
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ exo: Run your own AI cluster at home with everyday devices. Maintained by [exo l
[![Tests](https://dl.circleci.com/status-badge/img/circleci/TrkofJDoGzdQAeL6yVHKsg/4i5hJuafuwZYZQxbRAWS71/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/circleci/TrkofJDoGzdQAeL6yVHKsg/4i5hJuafuwZYZQxbRAWS71/tree/main)
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)

<a href="https://trendshift.io/repositories/11849" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11849" alt="exo-explore%2Fexo | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

</div>

---
Expand All @@ -38,7 +40,7 @@ We also welcome contributions from the community. We have a list of bounties in

### Wide Model Support

exo supports different models including LLaMA ([MLX](exo/inference/mlx/models/llama.py) and [tinygrad](exo/inference/tinygrad/models/llama.py)), Mistral, LlaVA, Qwen and Deepseek.
exo supports different models including LLaMA ([MLX](exo/inference/mlx/models/llama.py) and [tinygrad](exo/inference/tinygrad/models/llama.py)), Mistral, LlaVA, Qwen, and Deepseek.

### Dynamic Model Partitioning

Expand Down Expand Up @@ -100,13 +102,13 @@ source install.sh

- There are a number of things users have empirically found to improve performance on Apple Silicon Macs:

1. Upgrade to the latest version of MacOS 15.
1. Upgrade to the latest version of macOS Sequoia.
2. Run `./configure_mlx.sh`. This runs commands to optimize GPU memory allocation on Apple Silicon Macs.


## Documentation

### Example Usage on Multiple MacOS Devices
### Example Usage on Multiple macOS Devices

#### Device 1:

Expand Down Expand Up @@ -177,9 +179,9 @@ curl http://localhost:52415/v1/chat/completions \
}'
```

### Example Usage on Multiple Heterogenous Devices (MacOS + Linux)
### Example Usage on Multiple Heterogenous Devices (macOS + Linux)

#### Device 1 (MacOS):
#### Device 1 (macOS):

```sh
exo
Expand Down Expand Up @@ -244,7 +246,7 @@ python3 format.py ./exo

## Known Issues

- On some versions of MacOS/Python, certificates are not installed properly which can lead to SSL errors (e.g. SSL error with huggingface.co). To fix this, run the Install Certificates command, usually:
- On certain versions of Python on macOS, certificates may not installed correctly, potentially causing SSL errors (e.g., when accessing huggingface.co). To resolve this, run the `Install Certificates` command, typicall as follows:

```sh
/Applications/Python 3.x/Install Certificates.command
Expand Down
2 changes: 1 addition & 1 deletion configure_mlx.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash

# Get the total memory in MB
TOTAL_MEM_MB=$(($(sysctl -n hw.memsize) / 1024 / 1024))
Expand Down
111 changes: 111 additions & 0 deletions examples/function_calling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import json
import re
import requests

def get_current_weather(location: str, unit: str = "celsius"):
"""Mock weather data function"""
# Hardcoded response for demo purposes
return {
"location": location,
"temperature": 22 if unit == "celsius" else 72,
"unit": unit,
"forecast": "Sunny with light clouds"
}

def try_parse_tool_calls(content: str):
"""Try parse the tool calls."""
tool_calls = []
offset = 0
for i, m in enumerate(re.finditer(r"<tool_call>\n(.+)?\n</tool_call>", content)):
if i == 0:
offset = m.start()
try:
func = json.loads(m.group(1))
tool_calls.append({"type": "function", "function": func})
if isinstance(func["arguments"], str):
func["arguments"] = json.loads(func["arguments"])
except json.JSONDecodeError as e:
print(f"Failed to parse tool calls: the content is {m.group(1)} and {e}")
pass
if tool_calls:
if offset > 0 and content[:offset].strip():
c = content[:offset]
else:
c = ""
return {"role": "assistant", "content": c, "tool_calls": tool_calls}
return {"role": "assistant", "content": re.sub(r"<\|im_end\|>$", "", content)}

def chat_completion(messages):
"""Send chat completion request to local server"""
response = requests.post(
"http://localhost:52415/v1/chat/completions",
json={
"model": "qwen-2.5-1.5b",
"messages": messages,
"tools": [{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}],
"tool_choice": "auto"
}
)
return response.json()

def main():
# Initial conversation
messages = [{
"role": "user",
"content": "Hi there, what's the weather in Boston?"
}]

# Get initial response
response = chat_completion(messages)
print(f"First response: {response}")
assistant_message = try_parse_tool_calls(response["choices"][0]["message"]["content"])
messages.append(assistant_message)

# If there are tool calls, execute them and continue conversation
if "tool_calls" in assistant_message:
for tool_call in assistant_message["tool_calls"]:
if tool_call["function"]["name"] == "get_current_weather":
args = tool_call["function"]["arguments"]
weather_data = get_current_weather(**args)

# Add tool response to messages
messages.append({
"role": "tool",
"content": json.dumps(weather_data),
"name": tool_call["function"]["name"]
})

# Get final response with weather data
response = chat_completion(messages)
print(f"Final response: {response}")
messages.append({
"role": "assistant",
"content": response["choices"][0]["message"]["content"]
})

# Print full conversation
for msg in messages:
print(f"\n{msg['role'].upper()}: {msg['content']}")

if __name__ == "__main__":
main()
Loading

0 comments on commit 218c1e7

Please sign in to comment.