Skip to content

Commit

Permalink
Merge pull request #1091 from tensorlakeai/image-build-fixes
Browse files Browse the repository at this point in the history
This PR makes some fixes and improvements to image building. Specifically it matches the python version in built images with the version the builder is running on. This removes the need to keep track of the python version since as long as the workflow and the images are at the same time everything should match.

It also adds the SDK version to the runtime information of graphs. This is related to some future internal work.
  • Loading branch information
j3m7 authored Dec 10, 2024
2 parents 5309853 + 474ae4e commit 40682fb
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 50 deletions.
13 changes: 10 additions & 3 deletions .github/workflows/publish_executor_containers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ jobs:
password: ${{ secrets.DOCKERHUB_TOKEN }}
- run: |
pip install indexify -U
indexify-cli build-default-image
indexify-cli build-default-image --python-version 3.10
docker push tensorlake/indexify-executor-default:3.10
indexify-cli build-default-image --python-version 3.11
docker push tensorlake/indexify-executor-default:3.11
indexify-cli build-default-image --python-version 3.12
docker push tensorlake/indexify-executor-default:3.12
indexify-cli build-default-image --python-version 3.13
docker push tensorlake/indexify-executor-default:3.13
indexify-cli build-image examples/pdf_document_extraction/images.py
indexify-cli build-image examples/pdf_structured_extraction/workflow.py
indexify-cli build-image examples/tweetsgenerator/workflow.py
indexify-cli build-image examples/pdf_structured_extraction/document_ai_api_version_workflow.py
docker push tensorlake/indexify-executor-default:3.10
docker push tensorlake/indexify-executor-default:3.11
docker push tensorlake/pdf-blueprint-st
docker push tensorlake/pdf-blueprint-lancdb
docker push tensorlake/pdf-blueprint-pdf-parser-gpu
Expand Down
20 changes: 14 additions & 6 deletions python-sdk/indexify/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from indexify.executor.agent import ExtractorAgent
from indexify.executor.function_worker import FunctionWorker
from indexify.functions_sdk.image import (
DEFAULT_IMAGE_3_10,
DEFAULT_IMAGE_3_11,
LOCAL_PYTHON_VERSION,
GetDefaultPythonImage,
Image,
)

Expand Down Expand Up @@ -129,6 +129,10 @@ def build_image(
image_names: Optional[List[str]] = None,
python_sdk_path: Optional[str] = None,
):
python_version: Optional[str] = (
typer.Option(LOCAL_PYTHON_VERSION, help="Version of the config file to build"),
)

globals_dict = {}

# Add the folder in the workflow file path to the current Python path
Expand All @@ -149,9 +153,14 @@ def build_image(


@app.command(help="Build default image for indexify")
def build_default_image():
_build_image(image=DEFAULT_IMAGE_3_10)
_build_image(image=DEFAULT_IMAGE_3_11)
def build_default_image(
python_version: Optional[str] = typer.Option(
f"{sys.version_info.major}.{sys.version_info.minor}",
help="Python version to use in the base image",
)
):

_build_image(image=GetDefaultPythonImage(python_version))

console.print(
Text(f"Built default indexify image", style="cyan"),
Expand Down Expand Up @@ -258,7 +267,6 @@ def _build_image(image: Image, python_sdk_path: Optional[str] = None):
WORKDIR /app
"""

run_strs = ["RUN " + i for i in image._run_strs]

docker_file += "\n".join(run_strs)
Expand Down
3 changes: 2 additions & 1 deletion python-sdk/indexify/functions_sdk/graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import json
import importlib
import sys
from collections import defaultdict
from queue import deque
Expand Down Expand Up @@ -221,6 +221,7 @@ def definition(self) -> ComputeGraphMetadata:
runtime_information=RuntimeInformation(
major_version=sys.version_info.major,
minor_version=sys.version_info.minor,
sdk_version=importlib.metadata.version("indexify"),
),
)

Expand Down
2 changes: 2 additions & 0 deletions python-sdk/indexify/functions_sdk/graph_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class NodeMetadata(BaseModel):
class RuntimeInformation(BaseModel):
major_version: int
minor_version: int
sdk_version: str


class ComputeGraphMetadata(BaseModel):
Expand All @@ -50,6 +51,7 @@ class ComputeGraphMetadata(BaseModel):
accumulator_zero_values: Dict[str, bytes] = {}
runtime_information: RuntimeInformation
replaying: bool = False
version: Optional[int] = -1

def get_input_payload_serializer(self):
return get_serializer(self.start_node.compute_fn.input_encoder)
Expand Down
50 changes: 20 additions & 30 deletions python-sdk/indexify/functions_sdk/image.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,24 @@
from typing import List
import sys
from typing import List, Optional

from pydantic import BaseModel


def python_version_to_image(python_version):
if python_version.startswith("3.9"):
return "python:3.9.20-bookworm"
elif python_version.startswith("3.10"):
return "python:3.10.15-bookworm"
elif python_version.startswith("3.11"):
return "python:3.11.10-bookworm"
else:
raise ValueError(f"unsupported Python version: {python_version}")


# Pydantic object for API
class ImageInformation(BaseModel):
image_name: str
tag: str
base_image: str
run_strs: List[str]
image_url: Optional[str] = ""


class Image:
def __init__(self, python="3.10"):
def __init__(self):
self._image_name = None
self._tag = "latest"
self._base_image = python_version_to_image(python)
self._python_version = python
self._base_image = BASE_IMAGE_NAME
self._python_version = LOCAL_PYTHON_VERSION
self._run_strs = []

def name(self, image_name):
Expand Down Expand Up @@ -55,18 +46,17 @@ def to_image_information(self):
)


DEFAULT_IMAGE_3_10 = (
Image()
.name("tensorlake/indexify-executor-default")
.base_image("python:3.10.15-slim-bookworm")
.tag("3.10")
.run("pip install indexify")
)

DEFAULT_IMAGE_3_11 = (
Image()
.name("tensorlake/indexify-executor-default")
.base_image("python:3.11.10-slim-bookworm")
.tag("3.11")
.run("pip install indexify")
)
LOCAL_PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}"
BASE_IMAGE_NAME = f"python:{LOCAL_PYTHON_VERSION}-slim-bookworm"


def GetDefaultPythonImage(python_version: str):
return (
Image()
.name("tensorlake/indexify-executor-default")
.base_image(f"python:{python_version}-slim-bookworm")
.tag(python_version)
)


DEFAULT_IMAGE = GetDefaultPythonImage(LOCAL_PYTHON_VERSION)
10 changes: 5 additions & 5 deletions python-sdk/indexify/functions_sdk/indexify_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pydantic import BaseModel, Field, PrivateAttr

from .data_objects import IndexifyData
from .image import DEFAULT_IMAGE_3_10, Image
from .image import DEFAULT_IMAGE, Image
from .object_serializer import get_serializer


Expand Down Expand Up @@ -80,7 +80,7 @@ class PlacementConstraints(BaseModel):
class IndexifyFunction:
name: str = ""
description: str = ""
image: Optional[Image] = DEFAULT_IMAGE_3_10
image: Optional[Image] = DEFAULT_IMAGE
placement_constraints: List[PlacementConstraints] = []
accumulate: Optional[Type[Any]] = None
input_encoder: Optional[str] = "cloudpickle"
Expand All @@ -103,7 +103,7 @@ def deserialize_output(cls, output: IndexifyData) -> Any:
class IndexifyRouter:
name: str = ""
description: str = ""
image: Optional[Image] = DEFAULT_IMAGE_3_10
image: Optional[Image] = DEFAULT_IMAGE
placement_constraints: List[PlacementConstraints] = []
input_encoder: Optional[str] = "cloudpickle"
output_encoder: Optional[str] = "cloudpickle"
Expand Down Expand Up @@ -141,7 +141,7 @@ def _process_dict_arg(dict_arg: dict, sig: inspect.Signature) -> Tuple[list, dic
def indexify_router(
name: Optional[str] = None,
description: Optional[str] = "",
image: Optional[Image] = DEFAULT_IMAGE_3_10,
image: Optional[Image] = DEFAULT_IMAGE,
placement_constraints: List[PlacementConstraints] = [],
input_encoder: Optional[str] = "cloudpickle",
output_encoder: Optional[str] = "cloudpickle",
Expand Down Expand Up @@ -188,7 +188,7 @@ def run(self, *args, **kwargs):
def indexify_function(
name: Optional[str] = None,
description: Optional[str] = "",
image: Optional[Image] = DEFAULT_IMAGE_3_10,
image: Optional[Image] = DEFAULT_IMAGE,
accumulate: Optional[Type[BaseModel]] = None,
input_encoder: Optional[str] = "cloudpickle",
output_encoder: Optional[str] = "cloudpickle",
Expand Down
10 changes: 7 additions & 3 deletions python-sdk/indexify/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,13 @@ def delete_compute_graph(
)
response.raise_for_status()

def graphs(self) -> List[str]:
response = self._get(f"graphs")
return response.json()["graphs"]
def graphs(self, namespace="default") -> List[ComputeGraphMetadata]:
response = self._get(f"namespaces/{namespace}/compute_graphs")
graphs = []
for graph in response.json()["compute_graphs"]:
graphs.append(ComputeGraphMetadata(**graph))

return graphs

def graph(self, name: str) -> ComputeGraphMetadata:
response = self._get(f"namespaces/{self.namespace}/compute_graphs/{name}")
Expand Down
7 changes: 7 additions & 0 deletions server/data_model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ pub struct ImageInformation {
pub run_strs: Vec<String>,
pub image_hash: String,
pub version: ImageVersion, // this gets updated when the hash changes
pub image_uri: String,
}

impl ImageInformation {
Expand All @@ -115,6 +116,7 @@ impl ImageInformation {
run_strs,
image_hash: format!("{:x}", image_hasher.finalize()),
version: ImageVersion::default(),
image_uri: "".to_string(),
}
}
}
Expand Down Expand Up @@ -337,6 +339,8 @@ impl Default for ImageVersion {
pub struct RuntimeInformation {
pub major_version: u8,
pub minor_version: u8,
#[serde(default)]
pub sdk_version: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
Expand Down Expand Up @@ -1077,6 +1081,7 @@ mod tests {
runtime_information: RuntimeInformation {
major_version: 3,
minor_version: 10,
sdk_version: "1.2.3".to_string(),
},
replaying: false,
};
Expand Down Expand Up @@ -1112,6 +1117,7 @@ mod tests {
runtime_information: RuntimeInformation {
major_version: 3,
minor_version: 12, // updated
sdk_version: "1.2.3".to_string(),
},
..graph.clone()
};
Expand Down Expand Up @@ -1164,6 +1170,7 @@ mod tests {
runtime_information: RuntimeInformation {
major_version: 0,
minor_version: 0,
sdk_version: "1.2.3".to_string(),
},
replaying: false,
}
Expand Down
4 changes: 4 additions & 0 deletions server/data_model/src/test_objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ pub mod tests {
runtime_information: RuntimeInformation {
major_version: 3,
minor_version: 10,
sdk_version: "1.2.3".to_string(),
},
replaying: false,
}
Expand All @@ -218,6 +219,7 @@ pub mod tests {
],
image_hash: "".to_string(),
version: Default::default(),
image_uri: "1234567890.dkr.ecr.us-east-1.amazonaws.com/test".to_string(),
},
};
let fn_b = test_compute_fn("fn_b", None);
Expand Down Expand Up @@ -248,6 +250,7 @@ pub mod tests {
runtime_information: RuntimeInformation {
major_version: 3,
minor_version: 10,
sdk_version: "1.2.3".to_string(),
},
replaying: false,
}
Expand Down Expand Up @@ -285,6 +288,7 @@ pub mod tests {
runtime_information: RuntimeInformation {
major_version: 3,
minor_version: 10,
sdk_version: "1.2.3".to_string(),
},
replaying: false,
}
Expand Down
Loading

0 comments on commit 40682fb

Please sign in to comment.