Skip to content

Commit

Permalink
refactor turbomind (2/N) (#2818)
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz authored Nov 29, 2024
1 parent 0b6dd1f commit 4ede631
Show file tree
Hide file tree
Showing 13 changed files with 370 additions and 696 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ link_directories(

# add_subdirectory(3rdparty)
add_subdirectory(src)
add_subdirectory(examples)
# add_subdirectory(examples)

if(BUILD_TEST)
add_subdirectory(tests/csrc)
Expand Down
8 changes: 2 additions & 6 deletions lmdeploy/turbomind/turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,12 +358,10 @@ def _forward_callback(self, result, ctx):
self.que.put((False, result))

def _forward_thread(self, inputs):
instance_comm = self.tm_model.model_comm.create_instance_comm(
self.gpu_count)

def _func():
try:
output = self.model_inst.forward(inputs, instance_comm)
output = self.model_inst.forward(inputs)
except Exception as e:
logger.error(f'unhandled exception: {e}')
self.que.put((-1, None))
Expand All @@ -377,12 +375,10 @@ def _async_forward_callback(self, result, ctx, que: LifoQueue):
que.put((False, result))

def _async_forward_thread(self, inputs, que: LifoQueue):
instance_comm = self.tm_model.model_comm.create_instance_comm(
self.gpu_count)

def _func():
try:
output = self.model_inst.forward(inputs, instance_comm)
output = self.model_inst.forward(inputs)
except Exception as e:
logger.error(f'unhandled exception: {e}')
que.put((-1, None))
Expand Down
4 changes: 1 addition & 3 deletions src/turbomind/models/llama/LlamaBatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "src/turbomind/utils/allocator.h"
#include "src/turbomind/utils/cublasMMWrapper.h"
#include "src/turbomind/utils/cuda_utils.h"
#include "src/turbomind/utils/instance_comm.h"
#include <condition_variable>
#include <curand_kernel.h>
#include <mutex>
Expand All @@ -32,8 +31,7 @@ struct SharedState {
};

struct Control {
AbstractInstanceComm* comm;
Request::Callback callback;
Request::Callback callback;
};

struct BatchState {
Expand Down
6 changes: 3 additions & 3 deletions src/turbomind/models/llama/LlamaV2.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

#pragma once

#include <limits>
#include <unordered_map>

#include "src/turbomind/layers/DynamicDecodeLayer.h"
#include "src/turbomind/models/llama/Barrier.h"
#include "src/turbomind/models/llama/LlamaBatch.h"
Expand All @@ -31,10 +34,7 @@
#include "src/turbomind/models/llama/unified_decoder.h"
#include "src/turbomind/utils/allocator.h"
#include "src/turbomind/utils/cublasMMWrapper.h"
#include "src/turbomind/utils/instance_comm.h"
#include "src/turbomind/utils/nccl_utils.h"
#include <limits>
#include <unordered_map>

namespace turbomind {

Expand Down
Loading

0 comments on commit 4ede631

Please sign in to comment.