Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialized owlv2 model #889

Merged
merged 23 commits into from
Dec 20, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Include incremental processing of images
probicheaux committed Dec 18, 2024

Verified

This commit was signed with the committer’s verified signature.
khaneliman Austin Horstman
commit ed298367079c7749212acb03eb646024327d6578
53 changes: 40 additions & 13 deletions inference/models/owlv2/owlv2.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import pickle
import weakref
from collections import defaultdict
from typing import Any, Dict, List, Literal, NewType, Tuple, Union
from typing import Any, Dict, List, Literal, NewType, Optional, Tuple, Union

import numpy as np
import torch
@@ -287,8 +287,6 @@ class OwlV2(RoboflowInferenceModel):
box_format = "xywh"

def __init__(self, *args, model_id=f"owlv2/{OWLV2_VERSION_ID}", **kwargs):
print(model_id)
print(kwargs)
super().__init__(*args, model_id=model_id, **kwargs)
hf_id = os.path.join("google", self.version_id)
processor = Owlv2Processor.from_pretrained(hf_id)
@@ -305,6 +303,8 @@ def __init__(self, *args, model_id=f"owlv2/{OWLV2_VERSION_ID}", **kwargs):
def reset_cache(self):
# each entry should be on the order of 300*4KB, so 1000 is 400MB of CUDA memory
self.image_embed_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
# no need for limit here, as we're only storing on CPU
self.cpu_image_embed_cache = dict()
# each entry should be on the order of 10 bytes, so 1000 is 10KB
self.image_size_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
# entry size will vary depending on the number of samples, but 10 should be safe
@@ -340,6 +340,16 @@ def download_weights(self) -> None:
# Download from huggingface
pass

def get_image_embeds(self, image_hash: Hash) -> Optional[torch.Tensor]:
if image_hash in self.image_embed_cache:
return self.image_embed_cache[image_hash]
elif image_hash in self.cpu_image_embed_cache:
tensors = self.cpu_image_embed_cache[image_hash]
tensors = tuple(t.to(DEVICE) for t in tensors)
return tensors
else:
return None

def compute_image_size(
self, image: Union[np.ndarray, LazyImageRetrievalWrapper]
) -> Tuple[int, int]:
@@ -359,7 +369,7 @@ def embed_image(self, image: Union[np.ndarray, LazyImageRetrievalWrapper]) -> Ha
else:
image_hash = hash_function(image.tobytes())

if image_hash in self.image_embed_cache:
if (image_embeds := self.get_image_embeds(image_hash)) is not None:
return image_hash

np_image = (
@@ -419,12 +429,10 @@ def get_query_embedding(
# NOTE: for now we're handling each image seperately
query_embeds = []
for image_hash, query_boxes in query_spec.items():
try:
_objectness, image_boxes, image_class_embeds, _, _ = (
self.image_embed_cache[image_hash]
)
except KeyError as error:
raise KeyError("We didn't embed the image first!") from error
image_embeds = self.get_image_embeds(image_hash)
if image_embeds is None:
raise KeyError("We didn't embed the image first!")
_objectness, image_boxes, image_class_embeds, _, _ = image_embeds

query_boxes_tensor = torch.tensor(
query_boxes, dtype=image_boxes.dtype, device=image_boxes.device
@@ -455,7 +463,10 @@ def infer_from_embed(
confidence: float,
iou_threshold: float,
) -> List[Dict]:
_, image_boxes, image_class_embeds, _, _ = self.image_embed_cache[image_hash]
image_embeds = self.get_image_embeds(image_hash)
if image_embeds is None:
raise KeyError("We didn't embed the image first!")
_, image_boxes, image_class_embeds, _, _ = image_embeds
class_map, class_names = make_class_map(query_embeddings)
all_predicted_boxes, all_predicted_classes, all_predicted_scores = [], [], []
for class_name, pos_neg_embedding_dict in query_embeddings.items():
@@ -554,7 +565,10 @@ def infer_from_embedding_dict(
)

def make_class_embeddings_dict(
self, training_data: List[Any], iou_threshold: float
self,
training_data: List[Any],
iou_threshold: float,
return_image_embeds: bool = False,
) -> Dict[str, PosNegDictType]:
wrapped_training_data = [
{
@@ -575,9 +589,16 @@ def make_class_embeddings_dict(
class_embeddings_dict = defaultdict(lambda: {"positive": [], "negative": []})

bool_to_literal = {True: "positive", False: "negative"}
return_image_embeds_dict = dict()
for train_image in wrapped_training_data:
# grab and embed image
image_hash = self.embed_image(train_image["image"])
if return_image_embeds:
if (image_embeds := self.get_image_embeds(image_hash)) is None:
raise KeyError("We didn't embed the image first!")
return_image_embeds_dict[image_hash] = tuple(
t.to("cpu") for t in image_embeds
)

# grab and normalize box prompts for this image
image_size = self.compute_image_size(train_image["image"])
@@ -614,6 +635,8 @@ def make_class_embeddings_dict(
}

self.class_embeddings_cache[wrapped_training_data_hash] = class_embeddings_dict
if return_image_embeds:
return class_embeddings_dict, return_image_embeds_dict

return class_embeddings_dict

@@ -658,12 +681,15 @@ def serialize_training_data(
):
roboflow_id = hf_id.replace("google/", "owlv2/")
owlv2 = OwlV2(model_id=roboflow_id)
train_data_dict = owlv2.make_class_embeddings_dict(training_data, iou_threshold)
train_data_dict, image_embeds = owlv2.make_class_embeddings_dict(
training_data, iou_threshold, return_image_embeds=True
)
train_data_dict = {
"huggingface_id": hf_id,
"train_data_dict": train_data_dict,
"class_names": list(train_data_dict.keys()),
"roboflow_id": roboflow_id,
"image_embeds": image_embeds,
}
train_data_path = os.path.join(save_dir, "train_data.pt")
os.makedirs(save_dir, exist_ok=True)
@@ -718,6 +744,7 @@ def load_model_artifacts_from_cache(self):
self.roboflow_id = self.model_data["roboflow_id"]
# each model can have its own OwlV2 instance because we use a singleton
self.owlv2 = OwlV2(model_id=self.roboflow_id)
self.owlv2.cpu_image_embed_cache = self.model_data["image_embeds"]

@property
def weights_file(self):
10 changes: 9 additions & 1 deletion tests/inference/models_predictions_tests/test_owlv2.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@

from inference.core.entities.requests.inference import ObjectDetectionInferenceRequest
from inference.core.entities.requests.owlv2 import OwlV2InferenceRequest
from inference.models.owlv2.owlv2 import OwlV2, SerializedOwlV2, Owlv2Singleton
from inference.models.owlv2.owlv2 import OwlV2, SerializedOwlV2, Owlv2Singleton, LazyImageRetrievalWrapper
from inference.core.env import OWLV2_VERSION_ID
from inference.core.cache.model_artifacts import get_cache_file_path

@@ -97,7 +97,15 @@ def test_owlv2_serialized():
os.makedirs(os.path.dirname(pt_path), exist_ok=True)
os.rename(serialized_pt, pt_path)
serialized_owlv2 = SerializedOwlV2(model_id=model_id)

# Get the image hash before inference
image_wrapper = LazyImageRetrievalWrapper(request.image)
image_hash = image_wrapper.image_hash
assert image_hash in serialized_owlv2.owlv2.cpu_image_embed_cache

response = serialized_owlv2.infer_from_request(request)


assert len(response.predictions) == 5
posts = [p for p in response.predictions if p.class_name == "post"]
posts.sort(key=lambda x: x.x)