Skip to content

Commit

Permalink
Fixed embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaddair committed Oct 18, 2024
1 parent fcfa679 commit df5ae30
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 14 deletions.
13 changes: 0 additions & 13 deletions server/lorax_server/models/flash_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,19 +353,6 @@ def from_pb(
adapter_meta=None,
)

@classmethod
def from_pb_embed(
self,
pb: generate_pb2.EmbedRequest,
tokenizer: PreTrainedTokenizerBase,
tokenizers: TokenizerManager,
processor,
config,
dtype,
device,
) -> "FlashCausalLMBatch":
return self.from_pb(pb, tokenizer, tokenizers, None, None, dtype, device)

@tracer.start_as_current_span("filter")
def filter(self, request_ids: List[int]) -> "FlashCausalLMBatch":
if len(request_ids) == 0:
Expand Down
2 changes: 1 addition & 1 deletion server/lorax_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ async def Embed(self, request: generate_pb2.EmbedRequest, context):
if not self.model.supports_embeddings:
raise ValueError("Model does not support embeddings")

batch = self.model.batch_type.from_pb_embed(
batch = self.model.batch_type.from_pb(
request.batch,
self.model.tokenizer,
self.model.tokenizers,
Expand Down

0 comments on commit df5ae30

Please sign in to comment.