-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from smallcloudai/code_scratchpads
merge with code_scratchpad repo
- Loading branch information
Showing
30 changed files
with
3,844 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
name: CI build | ||
|
||
on: | ||
# Trigger the workflow on pushes to only the 'main' branch (this avoids duplicate checks being run e.g. for dependabot pull requests) | ||
push: | ||
branches: [main] | ||
# Trigger the workflow on any pull request | ||
pull_request: | ||
workflow_dispatch: | ||
|
||
|
||
env: | ||
CARGO_INCREMENTAL: 0 | ||
CARGO_NET_RETRY: 10 | ||
# RUSTFLAGS: "-D warnings -W unreachable-pub" | ||
RUSTUP_MAX_RETRIES: 10 | ||
FETCH_DEPTH: 0 # pull in the tags for the version string | ||
MACOSX_DEPLOYMENT_TARGET: 10.15 | ||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc | ||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER: arm-linux-gnueabihf-gcc | ||
|
||
jobs: | ||
dist: | ||
strategy: | ||
matrix: | ||
include: | ||
- os: windows-latest | ||
target: x86_64-pc-windows-msvc | ||
code-target: win32-x64 | ||
- os: windows-latest | ||
target: i686-pc-windows-msvc | ||
code-target: win32-ia32 | ||
- os: windows-latest | ||
target: aarch64-pc-windows-msvc | ||
code-target: win32-arm64 | ||
- os: ubuntu-20.04 | ||
target: x86_64-unknown-linux-gnu | ||
code-target: linux-x64 | ||
container: ubuntu:18.04 | ||
- os: ubuntu-20.04 | ||
target: aarch64-unknown-linux-gnu | ||
code-target: linux-arm64 | ||
- os: ubuntu-20.04 | ||
target: arm-unknown-linux-gnueabihf | ||
code-target: linux-armhf | ||
- os: macos-11 | ||
target: x86_64-apple-darwin | ||
code-target: darwin-x64 | ||
- os: macos-11 | ||
target: aarch64-apple-darwin | ||
code-target: darwin-arm64 | ||
|
||
env: | ||
LLM_LS_TARGET: ${{ matrix.target }} | ||
|
||
name: dist (${{ matrix.target }}) | ||
runs-on: ${{ matrix.os }} | ||
container: ${{ matrix.container }} | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: ${{ env.FETCH_DEPTH }} | ||
|
||
- name: Install toolchain dependencies | ||
if: matrix.container == 'ubuntu:18.04' | ||
shell: bash | ||
run: | | ||
apt-get update && apt-get install -y build-essential curl libssl-dev pkg-config | ||
curl --proto '=https' --tlsv1.2 --retry 10 --retry-connrefused -fsSL "https://sh.rustup.rs" | sh -s -- --profile minimal --default-toolchain none -y | ||
echo "${CARGO_HOME:-$HOME/.cargo}/bin" >> $GITHUB_PATH | ||
- name: Install Rust toolchain | ||
run: | | ||
rustup update --no-self-update stable | ||
rustup target add ${{ matrix.target }} | ||
rustup component add rust-src | ||
- name: Update apt repositories | ||
if: matrix.target == 'aarch64-unknown-linux-gnu' || matrix.target == 'arm-unknown-linux-gnueabihf' | ||
run: sudo apt-get update | ||
|
||
- name: Install AArch64 target toolchain | ||
if: matrix.target == 'aarch64-unknown-linux-gnu' | ||
run: sudo apt-get install gcc-aarch64-linux-gnu | ||
|
||
- name: Install ARM target toolchain | ||
if: matrix.target == 'arm-unknown-linux-gnueabihf' | ||
run: sudo apt-get install gcc-multilib-arm-linux-gnueabihf | ||
|
||
- name: Build artifact | ||
if: matrix.target == 'x86_64-unknown-linux-gnu' | ||
run: OPENSSL_STATIC=1 OPENSSL_LIB_DIR=/usr/lib/x86_64-linux-gnu OPENSSL_INCLUDE_DIR=/usr/include/openssl cargo build --release --target-dir dist/ | ||
|
||
- name: Build artifact | ||
if: matrix.target != 'x86_64-unknown-linux-gnu' | ||
run: cargo build --release --target-dir dist/ | ||
|
||
- name: Upload artifacts | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: dist-${{ matrix.target }} | ||
if-no-files-found: ignore | ||
path: | | ||
./dist/release/code-scratchpads | ||
./dist/release/code-scratchpads.exe | ||
dist-x86_64-unknown-linux-musl: | ||
name: dist (x86_64-unknown-linux-musl) | ||
runs-on: ubuntu-latest | ||
env: | ||
LLM_LS_TARGET: x86_64-unknown-linux-musl | ||
# For some reason `-crt-static` is not working for clang without lld | ||
RUSTFLAGS: "-C link-arg=-fuse-ld=lld -C target-feature=-crt-static" | ||
container: | ||
image: rust:alpine | ||
volumes: | ||
- /usr/local/cargo/registry:/usr/local/cargo/registry | ||
|
||
steps: | ||
- name: Install dependencies | ||
run: apk add --no-cache git clang lld musl-dev nodejs npm openssl-dev pkgconfig g++ | ||
|
||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: ${{ env.FETCH_DEPTH }} | ||
|
||
- name: Dist | ||
run: cargo build --release --target-dir dist/ | ||
|
||
- name: Upload artifacts | ||
uses: actions/upload-artifact@v1 | ||
with: | ||
name: dist-x86_64-unknown-linux-musl | ||
path: ./dist/release/code-scratchpads | ||
|
||
# dist-arm64-unknown-linux-musl: | ||
# name: dist (arm64-unknown-linux-musl) | ||
# runs-on: ubuntu-latest | ||
|
||
# steps: | ||
# - name: Checkout repository | ||
# uses: actions/checkout@v3 | ||
# with: | ||
# fetch-depth: ${{ env.FETCH_DEPTH }} | ||
|
||
# - uses: uraimo/run-on-arch-action@v2 | ||
# name: Build artifact | ||
# id: build | ||
# with: | ||
# arch: aarch64 | ||
# distro: rust:alpine | ||
# dockerRunArgs: | | ||
# --volume "${PWD}:/data" | ||
# env: | | ||
# RUSTFLAGS: "-C link-arg=-fuse-ld=lld -C target-feature=-crt-static" | ||
# shell: /bin/sh | ||
# install: | | ||
# apk add --no-cache git clang lld musl-dev nodejs npm openssl-dev pkgconfig g++ | ||
# run: | | ||
# cd /data | ||
# cargo build --release --target-dir dist/ | ||
|
||
# - name: Upload artifacts | ||
# uses: actions/upload-artifact@v1 | ||
# with: | ||
# name: dist-arm64-unknown-linux-musl | ||
# path: ./dist/release/code-scratchpads |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
[package] | ||
name = "code-scratchpads" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
hyper = { version = "0.14", features = ["server", "stream"] } | ||
reqwest = { version = "0.11", features = ["json", "stream"] } | ||
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "rt-multi-thread", "signal"] } | ||
reqwest-eventsource = "0.4.0" | ||
url = "2.4.1" | ||
serde = { version = "1", features = ["derive"] } | ||
serde_json = "1" | ||
tower = "0.4" | ||
tower-lsp = "0.20" | ||
tracing = "0.1" | ||
tracing-appender = "0.2" | ||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } | ||
tokenizers = "0.13" | ||
ropey = "1.6" | ||
home = "0.5" | ||
structopt = "0.3" | ||
futures = "0.3" | ||
futures-core = "0.3" | ||
futures-util = "0.3" | ||
async-stream = "0.3.5" | ||
chrono = "0.4.31" | ||
difference = "2.0.0" | ||
regex = "1.9.5" | ||
|
||
#use = "0.0.0" | ||
#async-trait = "0.1.73" | ||
#route-recognizer = "0.3.1" | ||
#bytes = "0.5" | ||
#eventsource-client = "0.11.0" | ||
#reqwest-streams = { version = "0.3.0", features = ["json"] } | ||
#tokio-stream = "0.1.14" | ||
#eventsource-stream = "0.2.3" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
FROM python:3.8 | ||
|
||
ADD . /opt/app | ||
RUN pip install -v /opt/app | ||
|
||
EXPOSE 8001 | ||
CMD huggingface-cli login --token $HUGGINGFACE_TOKEN && python -m code_scratchpads.http_server |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Code Scratchpads | ||
|
||
This code converts high level code completion calls to low level prompts and converts result back. This is useful for many IDE plugins (VS Code, JB) as a common code that handles the low level. | ||
|
||
|
||
## Usage | ||
|
||
Simple example: | ||
|
||
``` | ||
curl http://127.0.0.1:8001/v1/code-completion -k \ | ||
-H 'Content-Type: application/json' \ | ||
-d '{ | ||
"inputs": { | ||
"sources": {"hello.py": "def hello_world():"}, | ||
"cursor": { | ||
"file": "hello.py", | ||
"line": 0, | ||
"character": 18 | ||
}, | ||
"multiline": true | ||
}, | ||
"model": "bigcode/starcoder", | ||
"stream": false, | ||
"parameters": { | ||
"temperature": 0.1, | ||
"max_new_tokens": 20 | ||
} | ||
}' | ||
``` | ||
|
||
Output is `[{"code_completion": "\n return \"Hello World!\"\n"}]`. | ||
|
||
To check out more examples, look at [code_scratchpads/tests/test_api.py](code_scratchpads/tests/test_api.py). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
use reqwest::header::AUTHORIZATION; | ||
use tracing::info; | ||
use tokio::io::AsyncWriteExt; | ||
use std::path::Path; | ||
|
||
|
||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub struct Error { | ||
pub message: String, | ||
pub data: Option<serde_json::Value>, | ||
} | ||
|
||
pub async fn download_tokenizer_file( | ||
http_client: &reqwest::Client, | ||
http_path: &str, | ||
api_token: String, | ||
to: impl AsRef<Path>, | ||
) -> Result<(), String> { | ||
if to.as_ref().exists() { | ||
return Ok(()); | ||
} | ||
info!("downloading tokenizer \"{}\" to {}...", http_path, to.as_ref().display()); | ||
tokio::fs::create_dir_all( | ||
to.as_ref().parent().ok_or_else(|| "tokenizer path has no parent")?, | ||
) | ||
.await | ||
.map_err(|e| format!("failed to create parent dir: {}", e))?; | ||
let mut req = http_client.get(http_path); | ||
if !api_token.is_empty() { | ||
req = req.header(AUTHORIZATION, format!("Bearer {api_token}")) | ||
} | ||
let res = req | ||
.send() | ||
.await | ||
.map_err(|e| format!("failed to get response: {}", e))? | ||
.error_for_status() | ||
.map_err(|e| format!("failed to get response: {}", e))?; | ||
let mut file = tokio::fs::OpenOptions::new() | ||
.write(true) | ||
.create(true) | ||
.open(to) | ||
.await | ||
.map_err(|e| format!("failed to open file: {}", e))?; | ||
file.write_all(&res.bytes().await | ||
.map_err(|e| format!("failed to fetch bytes: {}", e))? | ||
).await.map_err(|e| format!("failed to write to file: {}", e))?; | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
use serde::Deserialize; | ||
use serde::Serialize; | ||
use std::collections::HashMap; | ||
|
||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, Default)] | ||
pub struct CursorPosition { | ||
pub file: String, | ||
pub line: i32, | ||
pub character: i32, | ||
} | ||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, Default)] | ||
pub struct CodeCompletionInputs { | ||
pub sources: HashMap<String, String>, | ||
pub cursor: CursorPosition, | ||
pub multiline: bool, | ||
} | ||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, Default)] | ||
pub struct SamplingParameters { | ||
#[serde(default)] | ||
pub max_new_tokens: usize, | ||
pub temperature: Option<f32>, | ||
pub top_p: Option<f32>, | ||
pub stop: Option<Vec<String>>, | ||
} | ||
|
||
#[derive(Debug, Deserialize, Clone)] | ||
pub struct CodeCompletionPost { | ||
pub inputs: CodeCompletionInputs, | ||
#[serde(default)] | ||
pub parameters: SamplingParameters, | ||
#[serde(default)] | ||
pub model: String, | ||
#[serde(default)] | ||
pub scratchpad: String, | ||
#[serde(default)] | ||
pub stream: bool, | ||
} | ||
|
||
#[derive(Debug, Deserialize, Clone)] | ||
pub struct ChatMessage { | ||
pub role: String, | ||
pub content: String, | ||
} | ||
|
||
#[derive(Debug, Deserialize, Clone)] | ||
pub struct ChatPost { | ||
pub messages: Vec<ChatMessage>, | ||
#[serde(default)] | ||
pub parameters: SamplingParameters, | ||
#[serde(default)] | ||
pub model: String, | ||
#[serde(default)] | ||
pub scratchpad: String, | ||
pub stream: Option<bool>, | ||
} |
Oops, something went wrong.