Skip to content

Commit

Permalink
Merge pull request #4 from smallcloudai/code_scratchpads
Browse files Browse the repository at this point in the history
merge with code_scratchpad repo
  • Loading branch information
reymondzzzz authored Oct 19, 2023
2 parents 514a3b6 + 59a9d32 commit 49a9e55
Show file tree
Hide file tree
Showing 30 changed files with 3,844 additions and 3 deletions.
170 changes: 170 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
name: CI build

on:
# Trigger the workflow on pushes to only the 'main' branch (this avoids duplicate checks being run e.g. for dependabot pull requests)
push:
branches: [main]
# Trigger the workflow on any pull request
pull_request:
workflow_dispatch:


env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
# RUSTFLAGS: "-D warnings -W unreachable-pub"
RUSTUP_MAX_RETRIES: 10
FETCH_DEPTH: 0 # pull in the tags for the version string
MACOSX_DEPLOYMENT_TARGET: 10.15
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER: arm-linux-gnueabihf-gcc

jobs:
dist:
strategy:
matrix:
include:
- os: windows-latest
target: x86_64-pc-windows-msvc
code-target: win32-x64
- os: windows-latest
target: i686-pc-windows-msvc
code-target: win32-ia32
- os: windows-latest
target: aarch64-pc-windows-msvc
code-target: win32-arm64
- os: ubuntu-20.04
target: x86_64-unknown-linux-gnu
code-target: linux-x64
container: ubuntu:18.04
- os: ubuntu-20.04
target: aarch64-unknown-linux-gnu
code-target: linux-arm64
- os: ubuntu-20.04
target: arm-unknown-linux-gnueabihf
code-target: linux-armhf
- os: macos-11
target: x86_64-apple-darwin
code-target: darwin-x64
- os: macos-11
target: aarch64-apple-darwin
code-target: darwin-arm64

env:
LLM_LS_TARGET: ${{ matrix.target }}

name: dist (${{ matrix.target }})
runs-on: ${{ matrix.os }}
container: ${{ matrix.container }}

steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: ${{ env.FETCH_DEPTH }}

- name: Install toolchain dependencies
if: matrix.container == 'ubuntu:18.04'
shell: bash
run: |
apt-get update && apt-get install -y build-essential curl libssl-dev pkg-config
curl --proto '=https' --tlsv1.2 --retry 10 --retry-connrefused -fsSL "https://sh.rustup.rs" | sh -s -- --profile minimal --default-toolchain none -y
echo "${CARGO_HOME:-$HOME/.cargo}/bin" >> $GITHUB_PATH
- name: Install Rust toolchain
run: |
rustup update --no-self-update stable
rustup target add ${{ matrix.target }}
rustup component add rust-src
- name: Update apt repositories
if: matrix.target == 'aarch64-unknown-linux-gnu' || matrix.target == 'arm-unknown-linux-gnueabihf'
run: sudo apt-get update

- name: Install AArch64 target toolchain
if: matrix.target == 'aarch64-unknown-linux-gnu'
run: sudo apt-get install gcc-aarch64-linux-gnu

- name: Install ARM target toolchain
if: matrix.target == 'arm-unknown-linux-gnueabihf'
run: sudo apt-get install gcc-multilib-arm-linux-gnueabihf

- name: Build artifact
if: matrix.target == 'x86_64-unknown-linux-gnu'
run: OPENSSL_STATIC=1 OPENSSL_LIB_DIR=/usr/lib/x86_64-linux-gnu OPENSSL_INCLUDE_DIR=/usr/include/openssl cargo build --release --target-dir dist/

- name: Build artifact
if: matrix.target != 'x86_64-unknown-linux-gnu'
run: cargo build --release --target-dir dist/

- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: dist-${{ matrix.target }}
if-no-files-found: ignore
path: |
./dist/release/code-scratchpads
./dist/release/code-scratchpads.exe
dist-x86_64-unknown-linux-musl:
name: dist (x86_64-unknown-linux-musl)
runs-on: ubuntu-latest
env:
LLM_LS_TARGET: x86_64-unknown-linux-musl
# For some reason `-crt-static` is not working for clang without lld
RUSTFLAGS: "-C link-arg=-fuse-ld=lld -C target-feature=-crt-static"
container:
image: rust:alpine
volumes:
- /usr/local/cargo/registry:/usr/local/cargo/registry

steps:
- name: Install dependencies
run: apk add --no-cache git clang lld musl-dev nodejs npm openssl-dev pkgconfig g++

- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: ${{ env.FETCH_DEPTH }}

- name: Dist
run: cargo build --release --target-dir dist/

- name: Upload artifacts
uses: actions/upload-artifact@v1
with:
name: dist-x86_64-unknown-linux-musl
path: ./dist/release/code-scratchpads

# dist-arm64-unknown-linux-musl:
# name: dist (arm64-unknown-linux-musl)
# runs-on: ubuntu-latest

# steps:
# - name: Checkout repository
# uses: actions/checkout@v3
# with:
# fetch-depth: ${{ env.FETCH_DEPTH }}

# - uses: uraimo/run-on-arch-action@v2
# name: Build artifact
# id: build
# with:
# arch: aarch64
# distro: rust:alpine
# dockerRunArgs: |
# --volume "${PWD}:/data"
# env: |
# RUSTFLAGS: "-C link-arg=-fuse-ld=lld -C target-feature=-crt-static"
# shell: /bin/sh
# install: |
# apk add --no-cache git clang lld musl-dev nodejs npm openssl-dev pkgconfig g++
# run: |
# cd /data
# cargo build --release --target-dir dist/

# - name: Upload artifacts
# uses: actions/upload-artifact@v1
# with:
# name: dist-arm64-unknown-linux-musl
# path: ./dist/release/code-scratchpads
41 changes: 41 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[package]
name = "code-scratchpads"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
hyper = { version = "0.14", features = ["server", "stream"] }
reqwest = { version = "0.11", features = ["json", "stream"] }
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "rt-multi-thread", "signal"] }
reqwest-eventsource = "0.4.0"
url = "2.4.1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tower = "0.4"
tower-lsp = "0.20"
tracing = "0.1"
tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
tokenizers = "0.13"
ropey = "1.6"
home = "0.5"
structopt = "0.3"
futures = "0.3"
futures-core = "0.3"
futures-util = "0.3"
async-stream = "0.3.5"
chrono = "0.4.31"
difference = "2.0.0"
regex = "1.9.5"

#use = "0.0.0"
#async-trait = "0.1.73"
#route-recognizer = "0.3.1"
#bytes = "0.5"
#eventsource-client = "0.11.0"
#reqwest-streams = { version = "0.3.0", features = ["json"] }
#tokio-stream = "0.1.14"
#eventsource-stream = "0.2.3"

7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM python:3.8

ADD . /opt/app
RUN pip install -v /opt/app

EXPOSE 8001
CMD huggingface-cli login --token $HUGGINGFACE_TOKEN && python -m code_scratchpads.http_server
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Code Scratchpads

This code converts high level code completion calls to low level prompts and converts result back. This is useful for many IDE plugins (VS Code, JB) as a common code that handles the low level.


## Usage

Simple example:

```
curl http://127.0.0.1:8001/v1/code-completion -k \
-H 'Content-Type: application/json' \
-d '{
"inputs": {
"sources": {"hello.py": "def hello_world():"},
"cursor": {
"file": "hello.py",
"line": 0,
"character": 18
},
"multiline": true
},
"model": "bigcode/starcoder",
"stream": false,
"parameters": {
"temperature": 0.1,
"max_new_tokens": 20
}
}'
```

Output is `[{"code_completion": "\n return \"Hello World!\"\n"}]`.

To check out more examples, look at [code_scratchpads/tests/test_api.py](code_scratchpads/tests/test_api.py).

5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from setuptools import setup
from setuptools import setup, find_packages


setup(
name="refact-lsp",
py_modules=["refact_lsp"],
version="0.0.1",
url="https://github.com/smallcloudai/refact_lsp",
summary="LSP server for Refact, suitable for Sublime Text, and other editors",
Expand Down
48 changes: 48 additions & 0 deletions src/cached_tokenizers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use reqwest::header::AUTHORIZATION;
use tracing::info;
use tokio::io::AsyncWriteExt;
use std::path::Path;


#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Error {
pub message: String,
pub data: Option<serde_json::Value>,
}

pub async fn download_tokenizer_file(
http_client: &reqwest::Client,
http_path: &str,
api_token: String,
to: impl AsRef<Path>,
) -> Result<(), String> {
if to.as_ref().exists() {
return Ok(());
}
info!("downloading tokenizer \"{}\" to {}...", http_path, to.as_ref().display());
tokio::fs::create_dir_all(
to.as_ref().parent().ok_or_else(|| "tokenizer path has no parent")?,
)
.await
.map_err(|e| format!("failed to create parent dir: {}", e))?;
let mut req = http_client.get(http_path);
if !api_token.is_empty() {
req = req.header(AUTHORIZATION, format!("Bearer {api_token}"))
}
let res = req
.send()
.await
.map_err(|e| format!("failed to get response: {}", e))?
.error_for_status()
.map_err(|e| format!("failed to get response: {}", e))?;
let mut file = tokio::fs::OpenOptions::new()
.write(true)
.create(true)
.open(to)
.await
.map_err(|e| format!("failed to open file: {}", e))?;
file.write_all(&res.bytes().await
.map_err(|e| format!("failed to fetch bytes: {}", e))?
).await.map_err(|e| format!("failed to write to file: {}", e))?;
Ok(())
}
58 changes: 58 additions & 0 deletions src/call_validation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;


#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct CursorPosition {
pub file: String,
pub line: i32,
pub character: i32,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct CodeCompletionInputs {
pub sources: HashMap<String, String>,
pub cursor: CursorPosition,
pub multiline: bool,
}

#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct SamplingParameters {
#[serde(default)]
pub max_new_tokens: usize,
pub temperature: Option<f32>,
pub top_p: Option<f32>,
pub stop: Option<Vec<String>>,
}

#[derive(Debug, Deserialize, Clone)]
pub struct CodeCompletionPost {
pub inputs: CodeCompletionInputs,
#[serde(default)]
pub parameters: SamplingParameters,
#[serde(default)]
pub model: String,
#[serde(default)]
pub scratchpad: String,
#[serde(default)]
pub stream: bool,
}

#[derive(Debug, Deserialize, Clone)]
pub struct ChatMessage {
pub role: String,
pub content: String,
}

#[derive(Debug, Deserialize, Clone)]
pub struct ChatPost {
pub messages: Vec<ChatMessage>,
#[serde(default)]
pub parameters: SamplingParameters,
#[serde(default)]
pub model: String,
#[serde(default)]
pub scratchpad: String,
pub stream: Option<bool>,
}
Loading

0 comments on commit 49a9e55

Please sign in to comment.