Skip to content

Commit 7636e86

Browse files
authored
Merge pull request #30 from RICHARDNAN/csx-main-fix
删除废弃代码
2 parents a9d1d49 + 48fdace commit 7636e86

File tree

4 files changed

+3
-53
lines changed

4 files changed

+3
-53
lines changed

doc/zh/DeepseekR1_tutorial_zh_for_Ascend_NPU.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ conda install -c conda-forge libstdcxx-ng # 安装`GLIBCXX-3.4.32`
3737
apt install zlib1g-dev libtbb-dev libssl-dev libaio-dev libcurl4-openssl-dev
3838
pip3 install numpy==1.26.4 # 适配torch/torch_npu
3939
pip3 install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
40-
pip3 install packaging ninja transformers==4.43.2 fire protobuf attrs decorator cloudpickle ml-dtypes scipy tornado absl-py psutil
40+
pip3 install packaging ninja fire protobuf attrs decorator cloudpickle ml-dtypes scipy tornado absl-py psutil
4141
pip3 install sqlalchemy
42+
pip3 install transformers==4.57.1 #此处注意运行时transformers版本要求4.57.1(其他版本未验证)
4243
#pip3 install cpufeature # only for x86
4344
```
4445

@@ -122,7 +123,7 @@ python ktransformers/server/main.py \
122123
--gguf_path /mnt/data/models/DeepSeek-R1-q4km-w8a8 \
123124
--model_name DeepSeekV3ForCausalLM \
124125
--cpu_infer 60 \
125-
--optimize_config_path /home/huawei/ktransformers/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-300IA2-npu-serve.yaml \
126+
--optimize_config_path ./ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-300IA2-npu-serve.yaml \
126127
--max_new_tokens 128 \
127128
--max_batch_size 4 \
128129
--use_cuda_graph \

ktransformers/models/ascend/custom_ascend_modeling_deepseek_v3.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -114,16 +114,6 @@ def print_callback(self, param):
114114
print("########################################")
115115
print("hidden_states is ", hidden_states)
116116
print("########################################")
117-
# with torch.npu.stream(self.call_stream):
118-
# position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size = param
119-
# print("########################################")
120-
# print("position_ids is ", position_ids)
121-
# print("page_idx is ", page_idx)
122-
# print("page_offset is ", page_offset)
123-
# print("block_tables is ", block_tables)
124-
# print("hidden_states is ", hidden_states)
125-
# print("#########################################")
126-
127117

128118
def forward(
129119
self,
@@ -172,27 +162,8 @@ def forward(
172162
q_len_raw = None
173163
kv_len_raw = batch.minibatch.d_kv_len_list
174164
bsz_real = None
175-
# if utils._USE_NPU_GRAPH:
176-
# from libgraph_capture import graph_capture_launch_callback
177-
# param = (position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size)
178-
# graph_capture_launch_callback(self.print_callback, param, 1, self.stream.npu_stream)
179-
# else:
180-
# param = (position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size)
181-
# self.print_callback(param)
182-
183165

184-
# with torch_npu.npu.stream(self.stream):
185-
# print_ex("####: before decode layer...")
186166
for i, decode_layer in enumerate(self.model.layers):
187-
# if not is_prefill:
188-
# if utils._USE_NPU_GRAPH:
189-
# from libgraph_capture import graph_capture_launch_callback
190-
# param = (hidden_states, )
191-
# graph_capture_launch_callback(self.print_callback, param, 1, self.stream.npu_stream)
192-
# else:
193-
# param = (hidden_states, )
194-
# self.print_callback(param)
195-
# attn
196167
residual = hidden_states
197168
hidden_states = decode_layer.input_layernorm(hidden_states)
198169

ktransformers/server/backend/interfaces/ktransformers.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -232,12 +232,6 @@ def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: Optional[f
232232
)
233233
self.seq_length = 1
234234

235-
# flat_prev_ids = self.generated_ids.flatten()
236-
# for i in range(min(self.seq_length, flat_input_ids.shape[0]) - 1):
237-
# if flat_input_ids[i] == flat_prev_ids[i]:
238-
# same_prefix += 1
239-
# else:
240-
# break
241235

242236
logger.debug(f"same prefix len: {same_prefix}")
243237
self.cache.remove_suffix(same_prefix)

ktransformers/server/utils/serve_profiling.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -106,19 +106,3 @@ def get_stat(self):
106106

107107
PROF_TIME_STAT = ProfTimeStat()
108108

109-
110-
# j=0
111-
# start_time = PROF_TIME_STAT.record_start_time()
112-
# for i in range(500):
113-
# j+=1
114-
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
115-
116-
# for i in range(500):
117-
# j+=1
118-
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
119-
120-
# for i in range(500):
121-
# j+=1
122-
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
123-
124-
# PROF_TIME_STAT.print_all()

0 commit comments

Comments
 (0)