diff --git a/benchmarks/bench_processors.py b/benchmarks/bench_processors.py
deleted file mode 100644
index 5b490154..00000000
--- a/benchmarks/bench_processors.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import numpy as np
-import torch
-
-import outlines.models as models
-from outlines.processors import OutlinesLogitsProcessor, RegexLogitsProcessor
-
-try:
-    import mlx.core as mx
-except ImportError:
-    pass
-
-
-def is_mlx_lm_allowed():
-    try:
-        import mlx.core as mx
-    except ImportError:
-        return False
-    return mx.metal.is_available()
-
-
-def get_mock_processor_inputs(array_library, num_tokens=30000):
-    """
-    logits: (4, 30,000 ) dtype=float
-    input_ids shape: (4, 2048) dtype=int
-    """
-    if array_library == "torch":
-        logits = torch.rand((4, num_tokens), dtype=torch.float)
-        input_ids = torch.randint(
-            low=0, high=num_tokens, size=(4, 2048), dtype=torch.int
-        )
-    elif array_library == "torch_cuda":
-        logits = torch.rand((4, num_tokens), dtype=torch.float, device="cuda")
-        input_ids = torch.randint(
-            low=0, high=num_tokens, size=(4, 2048), dtype=torch.int, device="cuda"
-        )
-    elif array_library == "numpy":
-        logits = np.random.rand(4, num_tokens).astype(np.float32)
-        input_ids = np.random.randint(low=0, high=num_tokens, size=(4, 2048))
-    elif array_library == "mlx":
-        logits = mx.random.uniform(
-            low=-1e9, high=1e9, shape=(4, num_tokens), dtype=mx.float32
-        )
-        input_ids = mx.random.randint(
-            low=0, high=num_tokens, shape=(4, 2048), dtype=mx.int32
-        )
-    else:
-        raise ValueError
-
-    return logits, input_ids
-
-
-class HalvingLogitsProcessor(OutlinesLogitsProcessor):
-    """Simply halve the passed logits"""
-
-    def process_logits(self, input_ids, logits):
-        return logits / 2
-
-
-class LogitsProcessorPassthroughBenchmark:
-    """
-    Benchmark the time it takes to convert between array frameworks
-    This should be on the order of microseconds
-    """
-
-    params = ["torch", "numpy"]
-    if is_mlx_lm_allowed():
-        params += ["mlx"]
-    if torch.cuda.is_available():
-        params += ["torch_cuda"]
-
-    def setup(self, array_library):
-        self.logits_processor = HalvingLogitsProcessor()
-
-        self.logits, self.input_ids = get_mock_processor_inputs(array_library)
-
-    def time_passthrough(self, *params):
-        self.logits_processor(self.input_ids, self.logits)
-
-
-class LogitsProcessorStructuredBenchmark:
-    """
-    Benchmark structured generation mask application for single decoder pass
-    """
-
-    array_libraries = ["torch", "numpy"]
-    if is_mlx_lm_allowed():
-        array_libraries += ["mlx"]
-    # PR TODO
-    if torch.cuda.is_available():
-        array_libraries += ["torch_cuda"]
-
-    # accept very many or very few tokens, respectively
-    patterns = [r"[^Z]*", "Z*"]
-
-    params = [array_libraries, patterns]
-    param_names = ["array_library, pattern"]
-
-    def setup(self, array_library, pattern):
-        tokenizer = models.transformers("facebook/opt-125m", device="cpu").tokenizer
-
-        self.logits_processor = RegexLogitsProcessor(pattern, tokenizer)
-
-        self.logits, self.input_ids = get_mock_processor_inputs(
-            array_library, len(tokenizer.vocabulary)
-        )
-
-    def time_structured_generation(self, array_library, pattern):
-        self.logits_processor(self.input_ids, self.logits)
diff --git a/outlines/integrations/__init__.py b/outlines/integrations/__init__.py
deleted file mode 100644
index b0a90d5e..00000000
--- a/outlines/integrations/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Utility functions and classes used to integrate `outlines` into other packages."""
diff --git a/outlines/integrations/transformers.py b/outlines/integrations/transformers.py
deleted file mode 100644
index 7c1bafd2..00000000
--- a/outlines/integrations/transformers.py
+++ /dev/null
@@ -1,159 +0,0 @@
-"""Make Hugging Face transformers compatible with Outlines' structured generation.
-
- _______________________________
-/ Don't want to self-host?       \
-\\ Try .json at http://dottxt.co /
- -------------------------------
-       \\   ^__^
-        \\  (oo)\\_______
-            (__)\\       )\\/\
-                ||----w |
-                ||     ||
-
-Copyright 2024- the Outlines developers
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-from collections import defaultdict
-from typing import DefaultDict, Iterable, Optional, Type, Union
-
-import torch
-from pydantic import BaseModel
-from transformers import Pipeline, PreTrainedTokenizerBase
-
-from outlines.fsm.guide import RegexGuide
-from outlines.fsm.json_schema import build_regex_from_schema
-from outlines.integrations.utils import adapt_tokenizer, convert_json_schema_to_str
-
-
-class RegexPrefixAllowedTokens:
-    """Bias transformers generation based on a regular expression.
-
-    Attributes
-    ----------
-    fsm
-        The finite state machine which is used to bias the logits.
-    """
-
-    def __init__(
-        self,
-        regex_string: str,
-        tokenizer_or_pipe: Union[PreTrainedTokenizerBase, Pipeline],
-    ):
-        """Compile the FSM that drives the regex-structured generation.
-
-        Parameters
-        ----------
-        regex_string
-            A string that represents a regular expression.
-        tokenizer_or_pipe
-            The tokenizer of the model, or the pipeline object.
-
-        Raises
-        ------
-        ValueError
-            If the `tokenizer_or_pipe` parameter is not a tokenizer or a pipeline.
-        """
-        if isinstance(tokenizer_or_pipe, Pipeline):
-            tokenizer = tokenizer_or_pipe.tokenizer
-        elif isinstance(tokenizer_or_pipe, PreTrainedTokenizerBase):
-            tokenizer = tokenizer_or_pipe
-        else:
-            raise ValueError(
-                "The tokenizer_or_pipe parameter must be a tokenizer or a pipeline."
-            )
-        assert isinstance(tokenizer, PreTrainedTokenizerBase)
-        tokenizer = adapt_tokenizer(tokenizer=tokenizer)
-        self.fsm = RegexGuide(regex_string=regex_string, tokenizer=tokenizer)
-        self._fsm_state: DefaultDict[int, int] = defaultdict(int)
-
-        # The generated text with `transformers` include the input token IDs as well,
-        # so we use this attribute to keep track of the input token IDs. This allows us
-        # to reset the FSM state when the input token IDs change, as well as to only
-        # apply the FSM to the generated tokens.
-        self._prefix = [-1]
-
-    def __call__(self, batch_id: int, sent: torch.Tensor) -> Optional[Iterable[int]]:
-        """Use the FSM to bias the logits before sampling the next token.
-
-        Parameters
-        ----------
-        batch_id
-            The index of the current batch.
-        sent
-            The tokens of the current sentence.
-
-        Returns
-        -------
-        List[int]
-            The indices of the tokens that are allowed to be sampled next.
-        """
-        input_ids = sent.tolist()
-
-        # If the prefix token IDs have changed we assume that we are dealing with a new
-        # sample and reset the FSM state
-        if input_ids[: len(self._prefix)] != self._prefix:
-            self._fsm_state = defaultdict(int)
-            self._prefix = input_ids
-            seq_id = hash(tuple([]))
-
-        else:
-            # Remove the prefix token IDs from the input token IDs, as the FSM should
-            # only be applied to the generated tokens
-            input_ids = input_ids[len(self._prefix) :]
-
-            last_token = input_ids[-1]
-            last_seq_id = hash(tuple(input_ids[:-1]))
-            seq_id = hash(tuple(input_ids))
-            self._fsm_state[seq_id] = self.fsm.get_next_state(
-                state=self._fsm_state[last_seq_id], token_id=last_token
-            )
-
-        allowed_tokens = self.fsm.get_next_instruction(
-            state=self._fsm_state[seq_id]
-        ).tokens
-        return allowed_tokens
-
-
-class JSONPrefixAllowedTokens(RegexPrefixAllowedTokens):
-    """Bias transformers generation based on a JSON schema.
-
-    Attributes
-    ----------
-    fsm
-        The finite state machine which is used to bias the logits.
-    """
-
-    def __init__(
-        self,
-        schema: Union[dict, Type[BaseModel], str],
-        tokenizer_or_pipe: Union[PreTrainedTokenizerBase, Pipeline],
-        whitespace_pattern: Optional[str] = None,
-    ):
-        """Compile the FSM that drives the JSON-guided generation.
-
-        Parameters
-        ----------
-        schema
-            A schema that encodes the structure we want the model to generate.
-        tokenizer_or_pipe
-            The tokenizer of the model, or the pipeline object.
-        whitespace_pattern
-            Pattern to use for JSON syntactic whitespace (doesn't impact string
-            literals). For example, to allow only a single space or newline with
-            `whitespace_pattern=r"[\n ]?"`
-        """
-        schema_str = convert_json_schema_to_str(json_schema=schema)
-        regex_string = build_regex_from_schema(schema_str, whitespace_pattern)
-        super().__init__(regex_string=regex_string, tokenizer_or_pipe=tokenizer_or_pipe)
diff --git a/outlines/integrations/utils.py b/outlines/integrations/utils.py
index 9ac4e2a4..67c70685 100644
--- a/outlines/integrations/utils.py
+++ b/outlines/integrations/utils.py
@@ -1,34 +1,5 @@
-"""Utility functions used in integrations with other packages.
+from typing import Union
 
- _______________________________
-/ Don't want to self-host?       \
-\\ Try .json at http://dottxt.co /
- -------------------------------
-       \\   ^__^
-        \\  (oo)\\_______
-            (__)\\       )\\/\
-                ||----w |
-                ||     ||
-
-Copyright 2024- the Outlines developers
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import json
-from typing import Type, Union
-
-from pydantic import BaseModel
 from transformers import SPIECE_UNDERLINE, PreTrainedTokenizerBase
 
 
@@ -68,36 +39,3 @@ def convert_token_to_string(token: Union[str, bytes]) -> str:
     tokenizer.convert_token_to_string = convert_token_to_string
 
     return tokenizer
-
-
-def convert_json_schema_to_str(json_schema: Union[dict, str, Type[BaseModel]]) -> str:
-    """Convert a JSON schema to a string.
-
-    Parameters
-    ----------
-    json_schema
-        The JSON schema.
-
-    Returns
-    -------
-    str
-        The JSON schema converted to a string.
-
-    Raises
-    ------
-    ValueError
-        If the schema is not a dictionary, a string or a Pydantic class.
-    """
-    if isinstance(json_schema, dict):
-        schema_str = json.dumps(json_schema)
-    elif isinstance(json_schema, str):
-        schema_str = json_schema
-    elif issubclass(json_schema, BaseModel):
-        schema_str = json.dumps(json_schema.model_json_schema())
-    else:
-        raise ValueError(
-            f"Cannot parse schema {json_schema}. The schema must be either "
-            + "a Pydantic class, a dictionary or a string that contains the JSON "
-            + "schema specification"
-        )
-    return schema_str
diff --git a/outlines/processors/__init__.py b/outlines/processors/__init__.py
deleted file mode 100644
index 22c10d90..00000000
--- a/outlines/processors/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from .structured import (
-    CFGLogitsProcessor,
-    FSMLogitsProcessor,
-    JSONLogitsProcessor,
-    OutlinesLogitsProcessor,
-    RegexLogitsProcessor,
-)
diff --git a/outlines/processors/base_logits_processor.py b/outlines/processors/base_logits_processor.py
deleted file mode 100644
index feedf525..00000000
--- a/outlines/processors/base_logits_processor.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from abc import abstractmethod
-from typing import TYPE_CHECKING, List, Protocol, Type, Union
-
-import numpy as np
-import torch
-from numpy.typing import NDArray
-
-if TYPE_CHECKING:
-    import mlx.core as mx
-
-
-Array = Union[NDArray, torch.Tensor, List, "mx.array"]
-
-
-def is_mlx_array_type(array_type):
-    try:
-        import mlx.core as mx
-    except ImportError:
-        return False
-    return issubclass(array_type, mx.array)
-
-
-class OutlinesLogitsProcessor(Protocol):
-    """
-    Base class for logits processors which normalizes types of logits:
-    - ndarray (used by llama-cpp-python), converted to torch.Tensor
-    - mlx.core.array (used by mlx-lm), converted to torch.Tensor
-    - torch.Tensor (used by everything else)
-
-    Normalization of types and conversion to torch.Tensor
-    doesn't move memory, it just casts the type.
-
-    Normalizing the types allows all logits processors inheriting from this class
-    to implement a single method for all the business logit: `process_logits()`
-    """
-
-    @abstractmethod
-    def process_logits(
-        self, input_ids: List[List[int]], logits: torch.Tensor
-    ) -> torch.Tensor:
-        """
-        input_ids and logits are always 2D tensors for handling a batch of sequences.
-
-        - input_ids -> List[List[tokens]]
-        - logits -> 2D_Tensor[logit floats]
-
-        Important to keep in mind when designing universal logits processors
-        - logits processors are only used once and never re-applied for a new sequence generator
-        - Some models only pass output_ids, some models such as llamacpp and transformers prefix with input_ids
-        - Some sampling methods, such as beam search, result in unstable sequence ordering in models like vLLM
-        """
-        pass
-
-    @torch.no_grad()
-    def __call__(
-        self,
-        input_ids: Array,
-        logits: Array,
-    ) -> Array:
-        """
-        Apply logits processor
-
-        1) Unify type
-        - convert input_ids: either ndarray, mlx array, List[int], or Tensor -> List[List[int]]
-        - convert logits: either ndarray, mlx array, or Tensor -> 2D float Tensor
-        2) Unify shape, ensure logits and input_ids are 2D
-        3) Call self.process_logits() to perform business logic
-        4) Cast logits back to original array library type
-        """
-        # ensure logits are torch Tensors
-        torch_logits = self._to_torch(logits)
-        input_ids = self._to_torch(input_ids)
-
-        assert torch_logits.shape[:-1] == input_ids.shape[:-1]
-
-        # Guarantee passed as 2D Tensors, then covert back to original (1D or 2D) shape
-        if len(torch_logits.shape) == 2:
-            processed_logits = self.process_logits(input_ids.tolist(), torch_logits)
-        elif len(torch_logits.shape) == 1:
-            processed_logits = self.process_logits(
-                [input_ids.tolist()], torch_logits.unsqueeze(0)
-            ).squeeze(0)
-
-        # return logits as passed array type
-        return self._from_torch(processed_logits, type(logits))
-
-    @staticmethod
-    def _to_torch(tensor_like: Array) -> torch.Tensor:
-        """Convert various types to torch.Tensor."""
-        if isinstance(tensor_like, torch.Tensor):
-            return tensor_like
-
-        elif isinstance(tensor_like, np.ndarray):
-            return torch.from_numpy(tensor_like)
-
-        elif isinstance(tensor_like, (list, tuple)):
-            return torch.tensor(tensor_like)
-
-        elif is_mlx_array_type(type(tensor_like)):
-            # mlx -> torch -> mlx conversion docs:
-            # https://ml-explore.github.io/mlx/build/html/usage/numpy.html
-            return torch.from_dlpack(tensor_like)
-
-        else:
-            raise TypeError(
-                "LogitsProcessor must be called with either np.NDArray, "
-                "torch.Tensor, list, or mlx.core.array typed logits. "
-                f"Logits type: `{type(tensor_like)}`"
-            )
-
-    @staticmethod
-    def _from_torch(tensor: torch.Tensor, target_type: Type) -> Array:
-        """Convert torch.Tensor to the specified target type."""
-        if target_type == torch.Tensor:
-            return tensor
-
-        elif target_type == np.ndarray:
-            return tensor.detach().numpy()
-
-        elif target_type == list:
-            return tensor.detach().tolist()
-
-        elif target_type == tuple:
-            return tuple(tensor.detach().tolist())
-
-        elif is_mlx_array_type(target_type):
-            import mlx.core as mx
-
-            # numpy doesn't support bfloat16, mlx doesn't support direct conversion from torch
-            return mx.array(tensor.float().numpy())
-
-        else:
-            raise TypeError(
-                f"Failed to convert torch tensors to target_type `{target_type}`"
-            )
diff --git a/outlines/processors/structured.py b/outlines/processors/structured.py
deleted file mode 100644
index 0966a90d..00000000
--- a/outlines/processors/structured.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"""
- _______________________________
-/ Don't want to self-host?       \
-\\ Try .json at http://dottxt.co /
- -------------------------------
-       \\   ^__^
-        \\  (oo)\\_______
-            (__)\\       )\\/\
-                ||----w |
-                ||     ||
-
-Copyright 2024- the Outlines developers
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import math
-from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union
-
-import torch
-from pydantic import BaseModel
-
-from outlines.fsm.guide import CFGGuide, Guide, RegexGuide
-from outlines.fsm.json_schema import build_regex_from_schema
-from outlines.integrations.utils import convert_json_schema_to_str
-
-from .base_logits_processor import OutlinesLogitsProcessor
-
-if TYPE_CHECKING:
-    from outlines.models.tokenizer import Tokenizer
-
-
-class FSMLogitsProcessor(OutlinesLogitsProcessor):
-    """Bias generation using a finite state machine.
-
-    Attributes
-    ----------
-    tokenizer
-        The tokenizer used to convert tokens to ids.
-    fsm
-        The finite state machine which is used to bias the logits.
-    """
-
-    def __init__(self, tokenizer: "Tokenizer", fsm: Guide):
-        """A FSM-based logits processor.
-
-        Parameters
-        ----------
-        tokenizer
-            The tokenizer used to convert tokens to ids.
-        fsm
-            The finite state machine which is used to bias the logits.
-        """
-        self.tokenizer = tokenizer
-        self._fsm_states: Dict[int, int] = {hash(tuple([])): 0}
-        self.fsm: Guide = fsm
-        self._seq_start_idx: Optional[int] = None
-
-    def process_logits(
-        self, input_ids: List[List[int]], logits: torch.Tensor
-    ) -> torch.Tensor:
-        """Use the FSM to bias the logits before sampling the next token.
-
-        Parameters
-        ----------
-        input_ids
-            The input token ids.
-        logits
-            The logits.
-
-        Returns
-        -------
-        torch.Tensor
-            The biased logits.
-        """
-        if self._seq_start_idx is None:
-            self._seq_start_idx = len(input_ids[0])
-
-        sequence_states: List[int] = []  # vector of states corresponding to `input_ids`
-
-        for seq_ids in input_ids:
-            gen_ids = seq_ids[self._seq_start_idx :]
-            curr_state_key = hash(tuple(gen_ids))
-
-            if curr_state_key not in self._fsm_states:
-                prev_state = self._fsm_states[hash(tuple(gen_ids[:-1]))]
-                curr_state = self.fsm.get_next_state(prev_state, gen_ids[-1])
-                self._fsm_states[curr_state_key] = curr_state
-
-            sequence_states.append(self._fsm_states[curr_state_key])
-
-        mask = torch.full_like(logits, -math.inf)
-        for i, fsm_state in enumerate(sequence_states):
-            allowed_tokens = self.fsm.get_next_instruction(fsm_state).tokens
-            mask[i, allowed_tokens] = logits[i, allowed_tokens]
-
-        return mask
-
-    def copy(self) -> "FSMLogitsProcessor":
-        """Return a copy of the logits processor."""
-        return FSMLogitsProcessor(tokenizer=self.tokenizer, fsm=self.fsm.copy())
-
-
-class RegexLogitsProcessor(FSMLogitsProcessor):
-    """Bias generation based on a regular expression.
-
-    Attributes
-    ----------
-    tokenizer
-        The tokenizer used to convert tokens to ids.
-    fsm
-        The finite state machine which is used to bias the logits.
-    """
-
-    def __init__(self, regex_string: str, tokenizer: "Tokenizer"):
-        """Compile the FSM that drives the regex-guided generation.
-
-        Parameters
-        ----------
-        regex_string
-            A string that represents a regular expression
-        tokenizer
-            An Outlines tokenizer
-        """
-        fsm = RegexGuide(regex_string, tokenizer)
-        super().__init__(tokenizer=tokenizer, fsm=fsm)
-
-
-class JSONLogitsProcessor(RegexLogitsProcessor):
-    """Bias generation based on a JSON schema.
-
-    Attributes
-    ----------
-    tokenizer
-        The tokenizer used to convert tokens to ids.
-    fsm
-        The finite state machine which is used to bias the logits.
-    """
-
-    def __init__(
-        self,
-        schema: Union[dict, Type[BaseModel], str],
-        tokenizer: "Tokenizer",
-        whitespace_pattern: Optional[str] = None,
-    ):
-        """Compile the FSM that drives the JSON-guided generation.
-
-        Parameters
-        ----------
-        schema
-            A JSON schema that encodes the structure we want the model to generate.
-        tokenizer
-            The tokenizer used to convert tokens to ids.
-        whitespace_pattern
-            Pattern to use for JSON syntactic whitespace (doesn't impact string
-            literals). For example, to allow only a single space or newline with
-            `whitespace_pattern=r"[\n ]?"`
-        """
-        schema_str = convert_json_schema_to_str(json_schema=schema)
-        regex_string = build_regex_from_schema(schema_str, whitespace_pattern)
-        super().__init__(regex_string=regex_string, tokenizer=tokenizer)
-
-
-class CFGLogitsProcessor(FSMLogitsProcessor):
-    """Bias generation based on a context-free grammar.
-
-    Attributes
-    ----------
-    tokenizer
-        The tokenizer used to convert tokens to ids.
-    fsm
-        The finite state machine which is used to bias the logits.
-    """
-
-    def __init__(self, cfg_str: str, tokenizer: "Tokenizer"):
-        """Compile the FSM that drives the CFG-guided generation.
-
-        Parameters
-        ----------
-        cfg_str
-            A string that represents a grammar
-        tokenizer
-            The tokenizer used to convert tokens to ids.
-        """
-        cfg_automata = CFGGuide(cfg_string=cfg_str, tokenizer=tokenizer)
-        super().__init__(tokenizer=tokenizer, fsm=cfg_automata)