Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Ocaml support #166

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,551 changes: 781 additions & 770 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ members = [
"source/carton-utils",
"source/carton-runner-packager",
"source/carton-runner-interface",
"source/carton-bindings-py",
"source/carton-bindings-nodejs",
"source/carton-bindings-ocaml",
"source/carton-bindings-py",
"source/carton-bindings-wasm",
"source/carton-macros",
"source/carton-runner-noop",
Expand Down
15 changes: 15 additions & 0 deletions source/carton-bindings-ocaml/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "carton-bindings-ocaml"
version = "0.1.0"
edition = "2021"
publish = false

[lib]
crate-type = ["staticlib", "cdylib"]

[dependencies]
carton-core = { package = "carton", path = "../carton" }
tokio = { version = "1", features = ["full"] }
ocaml-rust = { git = "https://github.com/LaurentMazare/ocaml-rust", rev = "7487c126bfa0785bd32e72918bc37a16d4d7d203" }
ocaml-sys = "0.23.0"
ndarray = "0.15.6"
15 changes: 15 additions & 0 deletions source/carton-bindings-ocaml/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/env bash

output=$(cargo build $1 --message-format json | tail -n 2 | head -n 1)
file_names=$(echo "$output" | jq -r ".filenames|.[]")

for name in $file_names; do
if [[ "$(printf "%s" "$name" | tail -c 2)" == ".a" ]]; then
cp "$name" "cartonml/lib/libcarton_bindings_ocaml.a"
else
cp "$name" "cartonml/lib/dllcarton_bindings_ocaml.so"
fi
done

cd cartonml
dune build
33 changes: 33 additions & 0 deletions source/carton-bindings-ocaml/cartonml/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
*.annot
*.cmo
*.cma
*.cmi
*.a
*.o
*.cmx
*.cmxs
*.cmxa

# ocamlbuild working directory
_build/

# ocamlbuild targets
*.byte
*.native

# oasis generated files
setup.data
setup.log

# Merlin configuring file for Vim and Emacs
.merlin

# Dune generated files
*.install

# Local OPAM switch
_opam/

# Dlls
lib/*.so
lib/*.a
4 changes: 4 additions & 0 deletions source/carton-bindings-ocaml/cartonml/.ocamlformat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
profile = janestreet
break-cases = fit
margin = 77
wrap-comments = true
36 changes: 36 additions & 0 deletions source/carton-bindings-ocaml/cartonml/cartonml.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "run any ML model from any programming language"
description: """

Carton is a library that lets you run any machine learning (ML) model from any programming language*.
It wraps existing models and provides a uniform interface for your application to use regardless of the framework the underlying model was implemented in.
See https://carton.run for an overview of how it works and how to get started.
"""
maintainer: ["Arthur Melton"]
authors: ["Vivek Panyam"]
license: "Apache-2.0"
tags: ["ML"]
homepage: "https://github.com/VivekPanyam/carton"
doc: "https://carton.run/quickstart"
bug-reports: "https://github.com/VivekPanyam/carton/issues"
depends: [
"ocaml"
"dune" {>= "3.10"}
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/VivekPanyam/carton.git"
29 changes: 29 additions & 0 deletions source/carton-bindings-ocaml/cartonml/dune-project
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
(lang dune 3.10)

(name cartonml)

(generate_opam_files true)

(source
(github VivekPanyam/carton))

(authors "Vivek Panyam")

(maintainers "Arthur Melton")

(license Apache-2.0)

(documentation https://carton.run/quickstart)

(package
(name cartonml)
(synopsis "run any ML model from any programming language")
(description "
Carton is a library that lets you run any machine learning (ML) model from any programming language*.
It wraps existing models and provides a uniform interface for your application to use regardless of the framework the underlying model was implemented in.
See https://carton.run for an overview of how it works and how to get started.
")
(depends ocaml dune)
(tags ("ML")))

; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project
45 changes: 45 additions & 0 deletions source/carton-bindings-ocaml/cartonml/lib/cartonml.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
open Option

type f_f_i_carton
type f_f_i_carton_error

type tensor =
| Float of float array
| Double of float array
| I8 of Int64.t array
| I16 of Int64.t array
| I32 of Int64.t array
| I64 of Int64.t array
| U8 of Int64.t array
| U16 of Int64.t array
| U32 of Int64.t array
| U64 of Int64.t array
| String of string array
[@@boxed]

external _load
: string
-> string option
-> string option
-> string option
-> (f_f_i_carton, f_f_i_carton_error) Result.t
= "__ocaml_ffi_load"

external infer
: f_f_i_carton
-> (string * tensor) array
-> (string * tensor) array
= "__ocaml_ffi_infer"

let load
?(visible_device = none)
?(override_runner_name = none)
?(override_required_framework_version = none)
path
=
_load
path
visible_device
override_runner_name
override_required_framework_version
;;
28 changes: 28 additions & 0 deletions source/carton-bindings-ocaml/cartonml/lib/cartonml.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
type f_f_i_carton
type f_f_i_carton_error

type tensor =
| Float of float array
| Double of float array
| I8 of Int64.t array
| I16 of Int64.t array
| I32 of Int64.t array
| I64 of Int64.t array
| U8 of Int64.t array
| U16 of Int64.t array
| U32 of Int64.t array
| U64 of Int64.t array
| String of string array

external infer
: f_f_i_carton
-> (string * tensor) array
-> (string * tensor) array
= "__ocaml_ffi_infer"

val load
: ?visible_device:string option
-> ?override_runner_name:string option
-> ?override_required_framework_version:string option
-> string
-> (f_f_i_carton, f_f_i_carton_error) Result.t
5 changes: 5 additions & 0 deletions source/carton-bindings-ocaml/cartonml/lib/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
(library
(public_name cartonml)
(foreign_archives carton_bindings_ocaml)
(c_library_flags
(-llzma -lbz2)))
33 changes: 33 additions & 0 deletions source/carton-bindings-ocaml/src/export.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use crate::*;

#[ocaml_rust::bridge]
mod ffi {
type FFICarton = Custom<Carton>;
type FFICartonError = Custom<CartonError>;

#[derive(Clone, Debug)]
pub enum Tensor {
Float(Vec<f32>),
Double(Vec<f64>),
I8(Vec<i64>),
I16(Vec<i64>),
I32(Vec<i64>),
I64(Vec<i64>),
U8(Vec<i64>),
U16(Vec<i64>),
U32(Vec<i64>),
U64(Vec<i64>),
String(Vec<String>),
}
Comment on lines +9 to +21
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to structure the OCaml interface so that it's possible for us to share numeric tensor data between Rust and OCaml (i.e. without having to make copies)?

I realize the fact that carton::types::Tensor is generic makes usage with tools like cxx or ocaml_rust tricky.

Conveniently, I should have a PR up tonight that changes Tensor to no longer be generic (for other reasons). I'll link it here when it's up. Hopefully that helps clean up some ffi code as well.

I'm aware that we currently also copy numeric tensor data in the Python bindings (only when going from Rust to Python), but the interface with Python is designed in a way where we can remove those copies without changing the interface. Can we do something similar for OCaml?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's the PR: #167

Copy link
Author

@arthurmelton arthurmelton Oct 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should actually help a lot implementing things.

The reason I am copying and such is that ocaml only supports Float, Double, int32, and int64, so we cant share by reference as ocaml could not understand some of the smaller ints. There are libraries to add support for other types but I believe if we wanted to make it have all the types in rust we would have have to fork ocaml-rust.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will also update my enum to have a i32 for the lower values if there is no way to not copy the data.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. As I mentioned in my other comment, it's been a long time since I've written OCaml so I don't have a whole lot of context. Is there a popular ndarray library for OCaml that we could use in the interface?

For example:

  • Python has Numpy,
  • Rust has the ndarray crate
  • JavaScript has a few usable ones on top of TypedArrays
  • C lets us pass pointers to data (along with a deleter function to free the data when it's no longer needed)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me see if I understand what these libraries accomplish. Is the goal to have a chunk of memory with a fixed length, and the "basic" types of a language (the ones specified in the tensor enum)?

I believe in Ocaml the Base.Array is a vector (I believe what its called when its not a linked list) so all the data will be in the same block of memory. To add support for the different number types we can use stdint (will give us support for the rest of the ints needed) then all the data would be the same as a rust vector.

If I am misunderstanding the reason for the need of these types, then there does look to be np for Ocaml that give support for matrixes and multi dementinal arrays.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of these libraries is to make interacting with multidimensional arrays (i.e. ndarrays) easier.

For a numeric multidimensional array, technically all you need is a base data pointer along with the shape and strides of the array. That's not particularly convenient to use directly however, so several ndarray libraries implement helpful functionality on top of the raw data.

It looks like OCaml has BigArray built in which appears to support a lot of features we need. Importantly, it also lets us pass Tensors between OCaml and Rust without having to make a copy of the underlying data. I'm not sure if you can use this easily using ocaml-rust though.


extern "Rust" {
fn load(
path: String,
visible_device: Option<String>,
override_runner_name: Option<String>,
override_required_framework_version: Option<String>,
) -> Result<FFICarton, FFICartonError>;

fn infer(model: FFICarton, tensors: Vec<(String, Tensor)>) -> Vec<(String, Tensor)>;
}
}
57 changes: 57 additions & 0 deletions source/carton-bindings-ocaml/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
mod export;
mod tensor;
mod utils;

use crate::export::Tensor;
use crate::utils::runtime;

use carton_core::error::CartonError;
use carton_core::types::{Device, LoadOpts, Tensor as CartonTensor};
use carton_core::Carton;
use ocaml_rust::Custom;

/// Load a carton model
fn load(
path: String,
visible_device: Option<String>,
override_runner_name: Option<String>,
override_required_framework_version: Option<String>,
) -> Result<Custom<Carton>, Custom<CartonError>> {
runtime().block_on(async {
let opts = LoadOpts {
override_runner_name,
override_required_framework_version,
override_runner_opts: None,
visible_device: match visible_device {
None => carton_core::types::Device::default(),
Some(visible_device) => Device::maybe_from_str(&visible_device).unwrap(),
},
};

match Carton::load(path, opts).await {
Ok(x) => Ok(Custom::new(x)),
Err(x) => Err(Custom::new(x)),
}
})
}

/// Infer the data
fn infer(model: Custom<Carton>, tensors: Vec<(String, Tensor)>) -> Vec<(String, Tensor)> {
runtime().block_on(async {
let transformed: Vec<(_, CartonTensor<_>)> = tensors
.into_iter()
.map(|(k, v)| (k, CartonTensor::from(v)))
.collect();

model
.inner()
.lock()
.unwrap()
.infer(transformed)
.await
.unwrap()
.into_iter()
.map(|(x, y)| (x, y.into()))
.collect()
})
}
65 changes: 65 additions & 0 deletions source/carton-bindings-ocaml/src/tensor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use carton_core::types::{GenericStorage, Tensor, TensorStorage, TypedStorage};

use crate::Tensor as SupportedTensorType;

macro_rules! from_tensor_int {
( $over:ident, $x:expr ) => {
SupportedTensorType::$over(
$x.view()
.into_owned()
.into_raw_vec()
.into_iter()
.map(|x| x as i64)
.collect(),
)
};
}

macro_rules! to_tensor_int {
( $over:ident, $x:expr, $as:ident ) => {
Tensor::<GenericStorage>::$over(
ndarray::ArrayD::from_shape_vec(
ndarray::IxDyn(&[1]),
$x.into_iter().map(|x| x as $as).collect(),
)
.unwrap(),
)
};
}

impl<T: TensorStorage> From<Tensor<T>> for SupportedTensorType {
fn from(item: Tensor<T>) -> Self {
match item {
Tensor::Float(x) => SupportedTensorType::Float(x.view().into_owned().into_raw_vec()),
Tensor::Double(x) => SupportedTensorType::Double(x.view().into_owned().into_raw_vec()),
Tensor::I8(x) => from_tensor_int!(I8, x),
Tensor::I16(x) => from_tensor_int!(I16, x),
Tensor::I32(x) => from_tensor_int!(I32, x),
Tensor::I64(x) => from_tensor_int!(I64, x),
Tensor::U8(x) => from_tensor_int!(U8, x),
Tensor::U16(x) => from_tensor_int!(U16, x),
Tensor::U32(x) => from_tensor_int!(U32, x),
Tensor::U64(x) => from_tensor_int!(U64, x),
Tensor::String(x) => SupportedTensorType::String(x.view().into_owned().into_raw_vec()),
Tensor::NestedTensor(_) => panic!("Nested tensor output not implemented yet"),
}
}
}

impl From<SupportedTensorType> for Tensor<GenericStorage> {
fn from(item: SupportedTensorType) -> Self {
match item {
SupportedTensorType::Float(x) => to_tensor_int!(Float, x, f32),
SupportedTensorType::Double(x) => to_tensor_int!(Double, x, f64),
SupportedTensorType::String(x) => to_tensor_int!(String, x, String),
SupportedTensorType::I8(x) => to_tensor_int!(I8, x, i8),
SupportedTensorType::I16(x) => to_tensor_int!(I16, x, i16),
SupportedTensorType::I32(x) => to_tensor_int!(I32, x, i32),
SupportedTensorType::I64(x) => to_tensor_int!(I64, x, i64),
SupportedTensorType::U8(x) => to_tensor_int!(U8, x, u8),
SupportedTensorType::U16(x) => to_tensor_int!(U16, x, u16),
SupportedTensorType::U32(x) => to_tensor_int!(U32, x, u32),
SupportedTensorType::U64(x) => to_tensor_int!(U64, x, u64),
}
}
}
11 changes: 11 additions & 0 deletions source/carton-bindings-ocaml/src/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Stolen from carton-bindings-c/src/utils.rs

use std::sync::OnceLock;

use tokio::runtime::Runtime;

/// A utility to lazily start a tokio runtime
pub(crate) fn runtime() -> &'static Runtime {
static CELL: OnceLock<Runtime> = OnceLock::new();
CELL.get_or_init(|| Runtime::new().unwrap())
}