Skip to content

Commit

Permalink
fix exceed session len core dump for chat and generate (#366)
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan authored Sep 7, 2023
1 parent 71ade77 commit ce21a31
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion lmdeploy/serve/async_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ async def generate(
prompt = self.model.messages2prompt(messages, sequence_start)
input_ids = self.tokenizer.encode(prompt)
finish_reason = 'stop' if stop else None
if not sequence_end and self.steps[str(session_id)] + len(
if self.steps[str(session_id)] + len(
input_ids) >= self.tm_model.session_len:
finish_reason = 'length'
yield GenOut('', self.steps[str(session_id)], len(input_ids), 0,
Expand Down
6 changes: 3 additions & 3 deletions lmdeploy/turbomind/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ def main(model_path,
seed = random.getrandbits(64)
else:
print(f'session {session_id}')
if step >= tm_model.session_len:
prompt = model.get_prompt(prompt, nth_round == 1)
input_ids = tokenizer.encode(prompt)
if step + len(input_ids) >= tm_model.session_len:
print('WARNING: exceed session max length.'
' Please end the session.')
continue
prompt = model.get_prompt(prompt, nth_round == 1)
input_ids = tokenizer.encode(prompt)
print(f'{prompt} ', end='', flush=True)
response_size = 0
for outputs in generator.stream_infer(
Expand Down

0 comments on commit ce21a31

Please sign in to comment.