Skip to content

Commit

Permalink
limit usage of llm::
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Feb 6, 2024
1 parent b0254f8 commit e8e32d1
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 18 deletions.
4 changes: 0 additions & 4 deletions rllm-cuda/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ cfg_if::cfg_if! {
} else {
pub mod llamacpp;
pub use llamacpp::BlockRef;
// pub use llamacpp as llm;
// pub use llm::{Device, DType, Tensor};
// pub(crate) use llamacpp::BlockRef;
// pub(crate) use llamacpp::blocks::CppBlockSpaceManager;
}
}

Expand Down
7 changes: 2 additions & 5 deletions rllm-cuda/src/llm/paged/batch_info.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use super::super::{kernels::to_offsets, tmodel::TModel};
use super::cache_engine::CacheEngine;
use crate::{
config::RllmConfig,
llm::{kernels::to_offsets, tmodel::TModel},
seq::SchedulingPhase,
util::pad_to_multiple,
HashMap, SchedulerOutputs,
config::RllmConfig, seq::SchedulingPhase, util::pad_to_multiple, HashMap, SchedulerOutputs,
};
use aicirt::api::Token;
use std::{
Expand Down
2 changes: 1 addition & 1 deletion rllm-cuda/src/llm/paged/blocks.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::super::tmodel::TModel;
use super::cache_engine::CacheEngine;
use crate::{
config::RllmConfig,
llm::tmodel::TModel,
seq::{SchedulingPhase, Sequence, SequenceGroup},
BlockLocation, CacheSize, HashMap, SchedulerOutputs, TBlockSpaceManager,
};
Expand Down
14 changes: 6 additions & 8 deletions rllm-cuda/src/llm/paged/cache_engine.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
// based on https://github.com/vllm-project/vllm/blob/b9fe4616f98b77b4b9458bce203aa6544cb31ef2/vllm/worker/cache_engine.py

use super::super::{config::TchRllmConfig, kernels, tmodel::TModel};
use super::CacheIface;
use crate::{config::RllmConfig, CacheSize, HashMap};
use std::sync::Arc;
use tch::{Device, Tensor};

#[cfg(not(feature = "cuda"))]
use super::cuda_stub::{CudaEvent, CudaStream};
use tch::{Device, Tensor};
#[cfg(feature = "cuda")]
use tch_cuda::{CudaEvent, CudaStream};

use crate::{
config::RllmConfig, llm::{config::TchRllmConfig, kernels, tmodel::TModel}, CacheSize, HashMap
};
use std::sync::Arc;

use super::CacheIface;

type KVCache = (Tensor, Tensor);

pub struct CacheEngine {
Expand Down

0 comments on commit e8e32d1

Please sign in to comment.