diff --git a/benchmarks/bench_processors.py b/benchmarks/bench_processors.py deleted file mode 100644 index 5b490154..00000000 --- a/benchmarks/bench_processors.py +++ /dev/null @@ -1,108 +0,0 @@ -import numpy as np -import torch - -import outlines.models as models -from outlines.processors import OutlinesLogitsProcessor, RegexLogitsProcessor - -try: - import mlx.core as mx -except ImportError: - pass - - -def is_mlx_lm_allowed(): - try: - import mlx.core as mx - except ImportError: - return False - return mx.metal.is_available() - - -def get_mock_processor_inputs(array_library, num_tokens=30000): - """ - logits: (4, 30,000 ) dtype=float - input_ids shape: (4, 2048) dtype=int - """ - if array_library == "torch": - logits = torch.rand((4, num_tokens), dtype=torch.float) - input_ids = torch.randint( - low=0, high=num_tokens, size=(4, 2048), dtype=torch.int - ) - elif array_library == "torch_cuda": - logits = torch.rand((4, num_tokens), dtype=torch.float, device="cuda") - input_ids = torch.randint( - low=0, high=num_tokens, size=(4, 2048), dtype=torch.int, device="cuda" - ) - elif array_library == "numpy": - logits = np.random.rand(4, num_tokens).astype(np.float32) - input_ids = np.random.randint(low=0, high=num_tokens, size=(4, 2048)) - elif array_library == "mlx": - logits = mx.random.uniform( - low=-1e9, high=1e9, shape=(4, num_tokens), dtype=mx.float32 - ) - input_ids = mx.random.randint( - low=0, high=num_tokens, shape=(4, 2048), dtype=mx.int32 - ) - else: - raise ValueError - - return logits, input_ids - - -class HalvingLogitsProcessor(OutlinesLogitsProcessor): - """Simply halve the passed logits""" - - def process_logits(self, input_ids, logits): - return logits / 2 - - -class LogitsProcessorPassthroughBenchmark: - """ - Benchmark the time it takes to convert between array frameworks - This should be on the order of microseconds - """ - - params = ["torch", "numpy"] - if is_mlx_lm_allowed(): - params += ["mlx"] - if torch.cuda.is_available(): - params += ["torch_cuda"] - - def setup(self, array_library): - self.logits_processor = HalvingLogitsProcessor() - - self.logits, self.input_ids = get_mock_processor_inputs(array_library) - - def time_passthrough(self, *params): - self.logits_processor(self.input_ids, self.logits) - - -class LogitsProcessorStructuredBenchmark: - """ - Benchmark structured generation mask application for single decoder pass - """ - - array_libraries = ["torch", "numpy"] - if is_mlx_lm_allowed(): - array_libraries += ["mlx"] - # PR TODO - if torch.cuda.is_available(): - array_libraries += ["torch_cuda"] - - # accept very many or very few tokens, respectively - patterns = [r"[^Z]*", "Z*"] - - params = [array_libraries, patterns] - param_names = ["array_library, pattern"] - - def setup(self, array_library, pattern): - tokenizer = models.transformers("facebook/opt-125m", device="cpu").tokenizer - - self.logits_processor = RegexLogitsProcessor(pattern, tokenizer) - - self.logits, self.input_ids = get_mock_processor_inputs( - array_library, len(tokenizer.vocabulary) - ) - - def time_structured_generation(self, array_library, pattern): - self.logits_processor(self.input_ids, self.logits) diff --git a/outlines/integrations/__init__.py b/outlines/integrations/__init__.py deleted file mode 100644 index b0a90d5e..00000000 --- a/outlines/integrations/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Utility functions and classes used to integrate `outlines` into other packages.""" diff --git a/outlines/integrations/transformers.py b/outlines/integrations/transformers.py deleted file mode 100644 index 7c1bafd2..00000000 --- a/outlines/integrations/transformers.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Make Hugging Face transformers compatible with Outlines' structured generation. - - _______________________________ -/ Don't want to self-host? \ -\\ Try .json at http://dottxt.co / - ------------------------------- - \\ ^__^ - \\ (oo)\\_______ - (__)\\ )\\/\ - ||----w | - || || - -Copyright 2024- the Outlines developers - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from collections import defaultdict -from typing import DefaultDict, Iterable, Optional, Type, Union - -import torch -from pydantic import BaseModel -from transformers import Pipeline, PreTrainedTokenizerBase - -from outlines.fsm.guide import RegexGuide -from outlines.fsm.json_schema import build_regex_from_schema -from outlines.integrations.utils import adapt_tokenizer, convert_json_schema_to_str - - -class RegexPrefixAllowedTokens: - """Bias transformers generation based on a regular expression. - - Attributes - ---------- - fsm - The finite state machine which is used to bias the logits. - """ - - def __init__( - self, - regex_string: str, - tokenizer_or_pipe: Union[PreTrainedTokenizerBase, Pipeline], - ): - """Compile the FSM that drives the regex-structured generation. - - Parameters - ---------- - regex_string - A string that represents a regular expression. - tokenizer_or_pipe - The tokenizer of the model, or the pipeline object. - - Raises - ------ - ValueError - If the `tokenizer_or_pipe` parameter is not a tokenizer or a pipeline. - """ - if isinstance(tokenizer_or_pipe, Pipeline): - tokenizer = tokenizer_or_pipe.tokenizer - elif isinstance(tokenizer_or_pipe, PreTrainedTokenizerBase): - tokenizer = tokenizer_or_pipe - else: - raise ValueError( - "The tokenizer_or_pipe parameter must be a tokenizer or a pipeline." - ) - assert isinstance(tokenizer, PreTrainedTokenizerBase) - tokenizer = adapt_tokenizer(tokenizer=tokenizer) - self.fsm = RegexGuide(regex_string=regex_string, tokenizer=tokenizer) - self._fsm_state: DefaultDict[int, int] = defaultdict(int) - - # The generated text with `transformers` include the input token IDs as well, - # so we use this attribute to keep track of the input token IDs. This allows us - # to reset the FSM state when the input token IDs change, as well as to only - # apply the FSM to the generated tokens. - self._prefix = [-1] - - def __call__(self, batch_id: int, sent: torch.Tensor) -> Optional[Iterable[int]]: - """Use the FSM to bias the logits before sampling the next token. - - Parameters - ---------- - batch_id - The index of the current batch. - sent - The tokens of the current sentence. - - Returns - ------- - List[int] - The indices of the tokens that are allowed to be sampled next. - """ - input_ids = sent.tolist() - - # If the prefix token IDs have changed we assume that we are dealing with a new - # sample and reset the FSM state - if input_ids[: len(self._prefix)] != self._prefix: - self._fsm_state = defaultdict(int) - self._prefix = input_ids - seq_id = hash(tuple([])) - - else: - # Remove the prefix token IDs from the input token IDs, as the FSM should - # only be applied to the generated tokens - input_ids = input_ids[len(self._prefix) :] - - last_token = input_ids[-1] - last_seq_id = hash(tuple(input_ids[:-1])) - seq_id = hash(tuple(input_ids)) - self._fsm_state[seq_id] = self.fsm.get_next_state( - state=self._fsm_state[last_seq_id], token_id=last_token - ) - - allowed_tokens = self.fsm.get_next_instruction( - state=self._fsm_state[seq_id] - ).tokens - return allowed_tokens - - -class JSONPrefixAllowedTokens(RegexPrefixAllowedTokens): - """Bias transformers generation based on a JSON schema. - - Attributes - ---------- - fsm - The finite state machine which is used to bias the logits. - """ - - def __init__( - self, - schema: Union[dict, Type[BaseModel], str], - tokenizer_or_pipe: Union[PreTrainedTokenizerBase, Pipeline], - whitespace_pattern: Optional[str] = None, - ): - """Compile the FSM that drives the JSON-guided generation. - - Parameters - ---------- - schema - A schema that encodes the structure we want the model to generate. - tokenizer_or_pipe - The tokenizer of the model, or the pipeline object. - whitespace_pattern - Pattern to use for JSON syntactic whitespace (doesn't impact string - literals). For example, to allow only a single space or newline with - `whitespace_pattern=r"[\n ]?"` - """ - schema_str = convert_json_schema_to_str(json_schema=schema) - regex_string = build_regex_from_schema(schema_str, whitespace_pattern) - super().__init__(regex_string=regex_string, tokenizer_or_pipe=tokenizer_or_pipe) diff --git a/outlines/integrations/utils.py b/outlines/integrations/utils.py index 9ac4e2a4..67c70685 100644 --- a/outlines/integrations/utils.py +++ b/outlines/integrations/utils.py @@ -1,34 +1,5 @@ -"""Utility functions used in integrations with other packages. +from typing import Union - _______________________________ -/ Don't want to self-host? \ -\\ Try .json at http://dottxt.co / - ------------------------------- - \\ ^__^ - \\ (oo)\\_______ - (__)\\ )\\/\ - ||----w | - || || - -Copyright 2024- the Outlines developers - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import json -from typing import Type, Union - -from pydantic import BaseModel from transformers import SPIECE_UNDERLINE, PreTrainedTokenizerBase @@ -68,36 +39,3 @@ def convert_token_to_string(token: Union[str, bytes]) -> str: tokenizer.convert_token_to_string = convert_token_to_string return tokenizer - - -def convert_json_schema_to_str(json_schema: Union[dict, str, Type[BaseModel]]) -> str: - """Convert a JSON schema to a string. - - Parameters - ---------- - json_schema - The JSON schema. - - Returns - ------- - str - The JSON schema converted to a string. - - Raises - ------ - ValueError - If the schema is not a dictionary, a string or a Pydantic class. - """ - if isinstance(json_schema, dict): - schema_str = json.dumps(json_schema) - elif isinstance(json_schema, str): - schema_str = json_schema - elif issubclass(json_schema, BaseModel): - schema_str = json.dumps(json_schema.model_json_schema()) - else: - raise ValueError( - f"Cannot parse schema {json_schema}. The schema must be either " - + "a Pydantic class, a dictionary or a string that contains the JSON " - + "schema specification" - ) - return schema_str diff --git a/outlines/processors/__init__.py b/outlines/processors/__init__.py deleted file mode 100644 index 22c10d90..00000000 --- a/outlines/processors/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .structured import ( - CFGLogitsProcessor, - FSMLogitsProcessor, - JSONLogitsProcessor, - OutlinesLogitsProcessor, - RegexLogitsProcessor, -) diff --git a/outlines/processors/base_logits_processor.py b/outlines/processors/base_logits_processor.py deleted file mode 100644 index feedf525..00000000 --- a/outlines/processors/base_logits_processor.py +++ /dev/null @@ -1,135 +0,0 @@ -from abc import abstractmethod -from typing import TYPE_CHECKING, List, Protocol, Type, Union - -import numpy as np -import torch -from numpy.typing import NDArray - -if TYPE_CHECKING: - import mlx.core as mx - - -Array = Union[NDArray, torch.Tensor, List, "mx.array"] - - -def is_mlx_array_type(array_type): - try: - import mlx.core as mx - except ImportError: - return False - return issubclass(array_type, mx.array) - - -class OutlinesLogitsProcessor(Protocol): - """ - Base class for logits processors which normalizes types of logits: - - ndarray (used by llama-cpp-python), converted to torch.Tensor - - mlx.core.array (used by mlx-lm), converted to torch.Tensor - - torch.Tensor (used by everything else) - - Normalization of types and conversion to torch.Tensor - doesn't move memory, it just casts the type. - - Normalizing the types allows all logits processors inheriting from this class - to implement a single method for all the business logit: `process_logits()` - """ - - @abstractmethod - def process_logits( - self, input_ids: List[List[int]], logits: torch.Tensor - ) -> torch.Tensor: - """ - input_ids and logits are always 2D tensors for handling a batch of sequences. - - - input_ids -> List[List[tokens]] - - logits -> 2D_Tensor[logit floats] - - Important to keep in mind when designing universal logits processors - - logits processors are only used once and never re-applied for a new sequence generator - - Some models only pass output_ids, some models such as llamacpp and transformers prefix with input_ids - - Some sampling methods, such as beam search, result in unstable sequence ordering in models like vLLM - """ - pass - - @torch.no_grad() - def __call__( - self, - input_ids: Array, - logits: Array, - ) -> Array: - """ - Apply logits processor - - 1) Unify type - - convert input_ids: either ndarray, mlx array, List[int], or Tensor -> List[List[int]] - - convert logits: either ndarray, mlx array, or Tensor -> 2D float Tensor - 2) Unify shape, ensure logits and input_ids are 2D - 3) Call self.process_logits() to perform business logic - 4) Cast logits back to original array library type - """ - # ensure logits are torch Tensors - torch_logits = self._to_torch(logits) - input_ids = self._to_torch(input_ids) - - assert torch_logits.shape[:-1] == input_ids.shape[:-1] - - # Guarantee passed as 2D Tensors, then covert back to original (1D or 2D) shape - if len(torch_logits.shape) == 2: - processed_logits = self.process_logits(input_ids.tolist(), torch_logits) - elif len(torch_logits.shape) == 1: - processed_logits = self.process_logits( - [input_ids.tolist()], torch_logits.unsqueeze(0) - ).squeeze(0) - - # return logits as passed array type - return self._from_torch(processed_logits, type(logits)) - - @staticmethod - def _to_torch(tensor_like: Array) -> torch.Tensor: - """Convert various types to torch.Tensor.""" - if isinstance(tensor_like, torch.Tensor): - return tensor_like - - elif isinstance(tensor_like, np.ndarray): - return torch.from_numpy(tensor_like) - - elif isinstance(tensor_like, (list, tuple)): - return torch.tensor(tensor_like) - - elif is_mlx_array_type(type(tensor_like)): - # mlx -> torch -> mlx conversion docs: - # https://ml-explore.github.io/mlx/build/html/usage/numpy.html - return torch.from_dlpack(tensor_like) - - else: - raise TypeError( - "LogitsProcessor must be called with either np.NDArray, " - "torch.Tensor, list, or mlx.core.array typed logits. " - f"Logits type: `{type(tensor_like)}`" - ) - - @staticmethod - def _from_torch(tensor: torch.Tensor, target_type: Type) -> Array: - """Convert torch.Tensor to the specified target type.""" - if target_type == torch.Tensor: - return tensor - - elif target_type == np.ndarray: - return tensor.detach().numpy() - - elif target_type == list: - return tensor.detach().tolist() - - elif target_type == tuple: - return tuple(tensor.detach().tolist()) - - elif is_mlx_array_type(target_type): - import mlx.core as mx - - # numpy doesn't support bfloat16, mlx doesn't support direct conversion from torch - return mx.array(tensor.float().numpy()) - - else: - raise TypeError( - f"Failed to convert torch tensors to target_type `{target_type}`" - ) diff --git a/outlines/processors/structured.py b/outlines/processors/structured.py deleted file mode 100644 index 0966a90d..00000000 --- a/outlines/processors/structured.py +++ /dev/null @@ -1,195 +0,0 @@ -""" - _______________________________ -/ Don't want to self-host? \ -\\ Try .json at http://dottxt.co / - ------------------------------- - \\ ^__^ - \\ (oo)\\_______ - (__)\\ )\\/\ - ||----w | - || || - -Copyright 2024- the Outlines developers - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import math -from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union - -import torch -from pydantic import BaseModel - -from outlines.fsm.guide import CFGGuide, Guide, RegexGuide -from outlines.fsm.json_schema import build_regex_from_schema -from outlines.integrations.utils import convert_json_schema_to_str - -from .base_logits_processor import OutlinesLogitsProcessor - -if TYPE_CHECKING: - from outlines.models.tokenizer import Tokenizer - - -class FSMLogitsProcessor(OutlinesLogitsProcessor): - """Bias generation using a finite state machine. - - Attributes - ---------- - tokenizer - The tokenizer used to convert tokens to ids. - fsm - The finite state machine which is used to bias the logits. - """ - - def __init__(self, tokenizer: "Tokenizer", fsm: Guide): - """A FSM-based logits processor. - - Parameters - ---------- - tokenizer - The tokenizer used to convert tokens to ids. - fsm - The finite state machine which is used to bias the logits. - """ - self.tokenizer = tokenizer - self._fsm_states: Dict[int, int] = {hash(tuple([])): 0} - self.fsm: Guide = fsm - self._seq_start_idx: Optional[int] = None - - def process_logits( - self, input_ids: List[List[int]], logits: torch.Tensor - ) -> torch.Tensor: - """Use the FSM to bias the logits before sampling the next token. - - Parameters - ---------- - input_ids - The input token ids. - logits - The logits. - - Returns - ------- - torch.Tensor - The biased logits. - """ - if self._seq_start_idx is None: - self._seq_start_idx = len(input_ids[0]) - - sequence_states: List[int] = [] # vector of states corresponding to `input_ids` - - for seq_ids in input_ids: - gen_ids = seq_ids[self._seq_start_idx :] - curr_state_key = hash(tuple(gen_ids)) - - if curr_state_key not in self._fsm_states: - prev_state = self._fsm_states[hash(tuple(gen_ids[:-1]))] - curr_state = self.fsm.get_next_state(prev_state, gen_ids[-1]) - self._fsm_states[curr_state_key] = curr_state - - sequence_states.append(self._fsm_states[curr_state_key]) - - mask = torch.full_like(logits, -math.inf) - for i, fsm_state in enumerate(sequence_states): - allowed_tokens = self.fsm.get_next_instruction(fsm_state).tokens - mask[i, allowed_tokens] = logits[i, allowed_tokens] - - return mask - - def copy(self) -> "FSMLogitsProcessor": - """Return a copy of the logits processor.""" - return FSMLogitsProcessor(tokenizer=self.tokenizer, fsm=self.fsm.copy()) - - -class RegexLogitsProcessor(FSMLogitsProcessor): - """Bias generation based on a regular expression. - - Attributes - ---------- - tokenizer - The tokenizer used to convert tokens to ids. - fsm - The finite state machine which is used to bias the logits. - """ - - def __init__(self, regex_string: str, tokenizer: "Tokenizer"): - """Compile the FSM that drives the regex-guided generation. - - Parameters - ---------- - regex_string - A string that represents a regular expression - tokenizer - An Outlines tokenizer - """ - fsm = RegexGuide(regex_string, tokenizer) - super().__init__(tokenizer=tokenizer, fsm=fsm) - - -class JSONLogitsProcessor(RegexLogitsProcessor): - """Bias generation based on a JSON schema. - - Attributes - ---------- - tokenizer - The tokenizer used to convert tokens to ids. - fsm - The finite state machine which is used to bias the logits. - """ - - def __init__( - self, - schema: Union[dict, Type[BaseModel], str], - tokenizer: "Tokenizer", - whitespace_pattern: Optional[str] = None, - ): - """Compile the FSM that drives the JSON-guided generation. - - Parameters - ---------- - schema - A JSON schema that encodes the structure we want the model to generate. - tokenizer - The tokenizer used to convert tokens to ids. - whitespace_pattern - Pattern to use for JSON syntactic whitespace (doesn't impact string - literals). For example, to allow only a single space or newline with - `whitespace_pattern=r"[\n ]?"` - """ - schema_str = convert_json_schema_to_str(json_schema=schema) - regex_string = build_regex_from_schema(schema_str, whitespace_pattern) - super().__init__(regex_string=regex_string, tokenizer=tokenizer) - - -class CFGLogitsProcessor(FSMLogitsProcessor): - """Bias generation based on a context-free grammar. - - Attributes - ---------- - tokenizer - The tokenizer used to convert tokens to ids. - fsm - The finite state machine which is used to bias the logits. - """ - - def __init__(self, cfg_str: str, tokenizer: "Tokenizer"): - """Compile the FSM that drives the CFG-guided generation. - - Parameters - ---------- - cfg_str - A string that represents a grammar - tokenizer - The tokenizer used to convert tokens to ids. - """ - cfg_automata = CFGGuide(cfg_string=cfg_str, tokenizer=tokenizer) - super().__init__(tokenizer=tokenizer, fsm=cfg_automata)