-
Notifications
You must be signed in to change notification settings - Fork 121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Image copy support #1122
Image copy support #1122
Changes from 16 commits
43e44b3
bbe1b00
3f3d8aa
636594b
b5abdac
e61afef
bb80832
e70fbfa
e895cc4
07b3ce4
a41c539
69b0b1e
88fdb53
10a8d02
b17d99a
a54bcb3
d7ff783
e59bb49
9af8b9b
4e51362
7eabab4
7d8cec2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,18 +2,19 @@ | |
|
||
configure_logging_early() | ||
|
||
|
||
import asyncio | ||
import os | ||
import shutil | ||
import signal | ||
import subprocess | ||
import sys | ||
import tempfile | ||
import threading | ||
import time | ||
from importlib.metadata import version | ||
from typing import Annotated, List, Optional | ||
|
||
import httpx | ||
import nanoid | ||
import structlog | ||
import typer | ||
|
@@ -27,11 +28,7 @@ | |
FunctionExecutorService, | ||
) | ||
from indexify.function_executor.server import Server as FunctionExecutorServer | ||
from indexify.functions_sdk.image import ( | ||
LOCAL_PYTHON_VERSION, | ||
GetDefaultPythonImage, | ||
Image, | ||
) | ||
from indexify.functions_sdk.image import Build, GetDefaultPythonImage, Image | ||
from indexify.http_client import IndexifyClient | ||
|
||
logger = structlog.get_logger(module=__name__) | ||
|
@@ -161,6 +158,32 @@ def build_image( | |
_create_image(obj, python_sdk_path) | ||
|
||
|
||
@app.command(help="Build platform images for function names") | ||
def build_platform_image( | ||
workflow_file_path: Annotated[str, typer.Argument()], | ||
image_names: Optional[List[str]] = None, | ||
build_service="https://api.tensorlake.ai/images/v1", | ||
): | ||
|
||
globals_dict = {} | ||
|
||
# Add the folder in the workflow file path to the current Python path | ||
folder_path = os.path.dirname(workflow_file_path) | ||
if folder_path not in sys.path: | ||
sys.path.append(folder_path) | ||
|
||
try: | ||
exec(open(workflow_file_path).read(), globals_dict) | ||
except FileNotFoundError as e: | ||
raise Exception( | ||
f"Could not find workflow file to execute at: " f"`{workflow_file_path}`" | ||
) | ||
for _, obj in globals_dict.items(): | ||
if type(obj) and isinstance(obj, Image): | ||
if image_names is None or obj._image_name in image_names: | ||
_create_platform_image(obj, build_service) | ||
|
||
|
||
@app.command(help="Build default image for indexify") | ||
def build_default_image( | ||
python_version: Optional[str] = typer.Option( | ||
|
@@ -230,7 +253,6 @@ def executor( | |
image_hash=image_hash, | ||
development_mode=dev, | ||
) | ||
|
||
try: | ||
asyncio.get_event_loop().run_until_complete(executor.run()) | ||
except asyncio.CancelledError: | ||
|
@@ -266,72 +288,82 @@ def function_executor( | |
).run() | ||
|
||
|
||
def _create_image(image: Image, python_sdk_path): | ||
console.print( | ||
Text("Creating container for ", style="cyan"), | ||
Text(f"`{image._image_name}`", style="cyan bold"), | ||
) | ||
_build_image(image=image, python_sdk_path=python_sdk_path) | ||
def _create_platform_image(image: Image, service_endpoint: str): | ||
fd, context_file = tempfile.mkstemp() | ||
image.build_context(context_file) | ||
client = httpx | ||
|
||
headers = {} | ||
api_key = os.getenv("TENSORLAKE_API_KEY") | ||
if api_key: | ||
headers["Authorization"] = f"Bearer {api_key}" | ||
|
||
def _build_image(image: Image, python_sdk_path: Optional[str] = None): | ||
image_hash = image.hash() | ||
|
||
try: | ||
import docker | ||
|
||
client = docker.from_env() | ||
client.ping() | ||
except Exception as e: | ||
console.print( | ||
Text("Unable to connect with docker: ", style="red bold"), | ||
Text(f"{e}", style="red"), | ||
) | ||
exit(-1) | ||
|
||
docker_contents = [ | ||
f"FROM {image._base_image}", | ||
"RUN mkdir -p ~/.indexify", | ||
"RUN touch ~/.indexify/image_name", | ||
f"RUN echo {image._image_name} > ~/.indexify/image_name", | ||
f"RUN echo {image.hash()} > ~/.indexify/image_hash", | ||
"WORKDIR /app", | ||
] | ||
|
||
docker_contents.extend(["RUN " + i for i in image._run_strs]) | ||
|
||
if python_sdk_path is not None: | ||
logging.info( | ||
f"Building image {image._image_name} with local version of the SDK" | ||
# Check if the image is built before pushing a new one | ||
builds_response = client.get( | ||
f"{service_endpoint}/builds", | ||
params={ | ||
"image_name": image._image_name, | ||
"image_hash": image_hash, | ||
}, | ||
headers=headers, | ||
) | ||
builds_response.raise_for_status() | ||
matching_builds = [Build.model_validate(b) for b in builds_response.json()] | ||
if not matching_builds: | ||
files = {"context": open(context_file, "rb")} | ||
|
||
data = {"name": image._image_name, "hash": image_hash} | ||
|
||
res = client.post( | ||
f"{service_endpoint}/builds", data=data, files=files, headers=headers | ||
) | ||
if not os.path.exists(python_sdk_path): | ||
print(f"error: {python_sdk_path} does not exist") | ||
os.exit(1) | ||
docker_contents.append(f"COPY {python_sdk_path} /app/python-sdk") | ||
docker_contents.append("RUN (cd /app/python-sdk && pip install .)") | ||
else: | ||
docker_contents.append(f"RUN pip install indexify=={image._sdk_version}") | ||
res.raise_for_status() | ||
|
||
docker_file = "\n".join(docker_contents) | ||
build = Build.model_validate(res.json()) | ||
else: | ||
build = matching_builds[0] | ||
|
||
match build.status: | ||
case "completed": | ||
print(f"image {build.image_name}:{build.image_hash} is already built") | ||
case "ready" | "building": | ||
print(f"waiting for {build.image_name} image to build") | ||
while build.status != "completed": | ||
res = client.get( | ||
f"{service_endpoint}/builds/{build.id}", headers=headers | ||
) | ||
build = Build.model_validate(res.json()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this where we'll be able to get the logs and display them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the build is completed another call will need to be made to retrieve the logs, that is not implemented at this time. |
||
time.sleep(5) | ||
j3m7 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
case _: | ||
raise ValueError(f"Unexpected build status {build.status}") | ||
|
||
match build.result: | ||
case "success": | ||
build_duration = build.push_completed_at - build.started_at | ||
print( | ||
f"Building completed in {build_duration}; image is stored in {build.uri}" | ||
) | ||
|
||
import docker.api.build | ||
case "failed": | ||
print(f"Building failed, please see logs for details") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the current expectation when this fails? Is there a way to get the logs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not for the client side, they are logged by the builder task. A subsequent feature would be to store them in the blob store and add some API tissue to retrieve them. |
||
|
||
docker.api.build.process_dockerfile = lambda dockerfile, path: ( | ||
"Dockerfile", | ||
dockerfile, | ||
) | ||
case _: | ||
raise ValueError(f"Unexpected build result {build.status}") | ||
|
||
console.print("Creating image using Dockerfile contents:", style="cyan bold") | ||
print(f"{docker_file}") | ||
|
||
client = docker.from_env() | ||
image_name = f"{image._image_name}:{image._tag}" | ||
(_image, generator) = client.images.build( | ||
path=".", | ||
dockerfile=docker_file, | ||
tag=image_name, | ||
rm=True, | ||
def _create_image(image: Image, python_sdk_path): | ||
console.print( | ||
Text("Creating container for ", style="cyan"), | ||
Text(f"`{image._image_name}`", style="cyan bold"), | ||
) | ||
for result in generator: | ||
print(result) | ||
_build_image(image=image, python_sdk_path=python_sdk_path) | ||
|
||
print(f"built image: {image_name}") | ||
|
||
def _build_image(image: Image, python_sdk_path: Optional[str] = None): | ||
built_image, output = image.build(python_sdk_path=python_sdk_path) | ||
for line in output: | ||
print(line) | ||
print(f"built image: {built_image.tags[0]}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@j3m7 this needs to be in the Tensorlake cli, and wrapped in Tensorlake deploy
for OSS, we could translate .copy to COPY in the local build context
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, it's final home will be in the tensorlake cli. added it here so we could start leveraging this functionality on our internal build pipelines before writing the tensorlake cli.
The .copy() calls are already rendered as COPY for both local and remote build contexts.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We discussed the approach of postponing creating the tensorlake CLI + SDK to later since it is not needed for document AI.
Since everything will be open source, having it live here for now is seen as a good way for us to make progress and not have to setup a new repo, new pipy, new CI.