Skip to content

Commit

Permalink
fix triton server backend
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan committed Feb 2, 2024
1 parent 129dbc2 commit fbe3da1
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 13 deletions.
13 changes: 7 additions & 6 deletions lmdeploy/serve/turbomind/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,15 +496,16 @@ def _stream_infer(self,
session.status = StatusCode.TRITON_SESSION_READY

que = queue.Queue()
producer = threading.Thread(
target=self._stream_producer,
args=(self.tritonserver_addr, session, que, self.cfg, input_ids,
input_lengths, request_output_len, sequence_start,
sequence_end, preseq_length, cancel, skip_special_tokens))
producer = threading.Thread(target=self._stream_producer,
args=(self.tritonserver_addr, session, que,
self.cfg, input_ids, input_lengths,
request_output_len, sequence_start,
sequence_end, preseq_length, cancel))
producer.start()
for status, res, n_token in self.stream_consumer(
self.postprocess, que, session, input_tokens, preseq_length,
cancel, logger, self.display, self.eos_id):
cancel, logger, self.display, self.eos_id,
skip_special_tokens):
yield status, res, n_token

producer.join()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,12 @@ def _postprocessing(self, tokens_batch, sequence_length,
skip_special_tokens):
"""decode token ids into texts."""
outputs = []
for beam_tokens, beam_len in zip(tokens_batch, sequence_length,
skip_special_tokens):
for tokens, _len in zip(beam_tokens, beam_len):
for beam_tokens, beam_len, beam_skip_special in zip(
tokens_batch, sequence_length, skip_special_tokens):
for tokens, _len, skip_special in zip(beam_tokens, beam_len,
beam_skip_special):
output = self.tokenizer.decode(
tokens, _len, skip_special_tokens=skip_special_tokens)
tokens, _len, skip_special_tokens=skip_special)
output = output.encode('utf8')
outputs.append(output)
return outputs
15 changes: 12 additions & 3 deletions lmdeploy/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ def encode(self, s: str, add_bos: bool = True, **kwargs):
"""
return self.model.Encode(s, add_bos=add_bos, **kwargs)

def decode(self, t: Sequence[int], offset: Optional[int] = None):
def decode(self,
t: Sequence[int],
offset: Optional[int] = None,
*args,
**kwargs):
"""De-tokenize.
Args:
Expand Down Expand Up @@ -517,7 +521,12 @@ def encode(self, s: str, add_bos: bool = True, **kwargs):
"""
return self.model.encode(s, add_bos, **kwargs)

def decode(self, t: Sequence[int], offset: Optional[int] = None):
def decode(
self,
t: Sequence[int],
offset: Optional[int] = None,
skip_special_tokens: bool = True,
):
"""De-tokenize.
Args:
Expand All @@ -527,7 +536,7 @@ def decode(self, t: Sequence[int], offset: Optional[int] = None):
Returns:
str: text of decoding tokens
"""
return self.model.decode(t, offset)
return self.model.decode(t, offset, skip_special_tokens)

def detokenize_incrementally(self,
all_input_ids: Sequence[int],
Expand Down

0 comments on commit fbe3da1

Please sign in to comment.