diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageEditAutoConfiguration.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageEditAutoConfiguration.java new file mode 100644 index 00000000000..48b5ff10200 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageEditAutoConfiguration.java @@ -0,0 +1,81 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.openai.autoconfigure; + +import static org.springframework.ai.model.openai.autoconfigure.OpenAIAutoConfigurationUtil.resolveConnectionProperties; + +import org.springframework.ai.image.observation.ImageModelObservationConvention; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.model.SpringAIModelProperties; +import org.springframework.ai.model.SpringAIModels; +import org.springframework.ai.openai.OpenAiImageEditModel; +import org.springframework.ai.openai.OpenAiImageModel; +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; +import org.springframework.beans.factory.ObjectProvider; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration; +import org.springframework.boot.autoconfigure.web.reactive.function.client.WebClientAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.retry.support.RetryTemplate; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; + +import io.micrometer.observation.ObservationRegistry; + +/** + * Image {@link AutoConfiguration Auto-configuration} for OpenAI. + * + * @author Minsoo Nam + */ +@AutoConfiguration(after = { RestClientAutoConfiguration.class, WebClientAutoConfiguration.class, + SpringAiRetryAutoConfiguration.class }) +@ConditionalOnClass(OpenAiApi.class) +@ConditionalOnProperty(name = SpringAIModelProperties.IMAGE_EDIT_MODEL, havingValue = SpringAIModels.OPENAI, + matchIfMissing = true) +@EnableConfigurationProperties({ OpenAiConnectionProperties.class, OpenAiImageEditProperties.class }) +@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class, + WebClientAutoConfiguration.class }) +public class OpenAiImageEditAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + public OpenAiImageEditModel openAiImageEditModel(OpenAiConnectionProperties commonProperties, + OpenAiImageEditProperties imageEditProperties, ObjectProvider restClientBuilderProvider, + RetryTemplate retryTemplate, ResponseErrorHandler responseErrorHandler) { + + OpenAIAutoConfigurationUtil.ResolvedConnectionProperties resolved = resolveConnectionProperties( + commonProperties, imageEditProperties, "image"); + + var openAiImageApi = OpenAiImageApi.builder() + .baseUrl(resolved.baseUrl()) + .apiKey(new SimpleApiKey(resolved.apiKey())) + .headers(resolved.headers()) + .restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder)) + .responseErrorHandler(responseErrorHandler) + .build(); + var imageModel = new OpenAiImageEditModel(openAiImageApi, imageEditProperties.getOptions(), retryTemplate); + return imageModel; + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageEditProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageEditProperties.java new file mode 100644 index 00000000000..e38f1a25e11 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageEditProperties.java @@ -0,0 +1,52 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.openai.autoconfigure; + +import org.springframework.ai.openai.OpenAiImageEditOptions; +import org.springframework.ai.openai.OpenAiImageOptions; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.NestedConfigurationProperty; + +/** + * OpenAI Image autoconfiguration properties. + * + * @author Minsoo Nam + * @since 1.0.0 + */ +@ConfigurationProperties(OpenAiImageEditProperties.CONFIG_PREFIX) +public class OpenAiImageEditProperties extends OpenAiParentProperties { + + public static final String CONFIG_PREFIX = "spring.ai.openai.image.edit"; + + public static final String DEFAULT_IMAGE_MODEL = OpenAiImageApi.ImageModel.DALL_E_2.getValue(); + + /** + * Options for OpenAI Image Edit API. + */ + @NestedConfigurationProperty + private OpenAiImageEditOptions options = OpenAiImageEditOptions.builder().model(DEFAULT_IMAGE_MODEL).build(); + + public OpenAiImageEditOptions getOptions() { + return this.options; + } + + public void setOptions(OpenAiImageEditOptions options) { + this.options = options; + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiModelConfigurationTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiModelConfigurationTests.java index 19cf832fda8..9270cf0a63a 100644 --- a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiModelConfigurationTests.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiModelConfigurationTests.java @@ -22,6 +22,7 @@ import org.springframework.ai.openai.OpenAiAudioTranscriptionModel; import org.springframework.ai.openai.OpenAiChatModel; import org.springframework.ai.openai.OpenAiEmbeddingModel; +import org.springframework.ai.openai.OpenAiImageEditModel; import org.springframework.ai.openai.OpenAiImageModel; import org.springframework.ai.openai.OpenAiModerationModel; import org.springframework.boot.autoconfigure.AutoConfigurations; @@ -33,6 +34,7 @@ * Unit Tests for OpenAI auto configurations' conditional enabling of models. * * @author Ilayaperumal Gopinathan + * @author Minsoo Nam */ public class OpenAiModelConfigurationTests { @@ -171,6 +173,54 @@ void imageModelActivation() { }); } + @Test + void imageEditModelActivation() { + this.contextRunner.withConfiguration(AutoConfigurations.of(OpenAiImageEditAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(OpenAiChatModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiEmbeddingModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiImageModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiImageEditModel.class)).isNotEmpty(); + assertThat(context.getBeansOfType(OpenAiAudioSpeechModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiAudioTranscriptionModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiModerationModel.class)).isEmpty(); + }); + + this.contextRunner.withConfiguration(AutoConfigurations.of(OpenAiImageEditAutoConfiguration.class)) + .withPropertyValues("spring.ai.model.image.edit=none") + .run(context -> { + assertThat(context.getBeansOfType(OpenAiImageEditProperties.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiImageEditModel.class)).isEmpty(); + }); + + this.contextRunner.withConfiguration(AutoConfigurations.of(OpenAiImageEditAutoConfiguration.class)) + .withPropertyValues("spring.ai.model.image.edit=openai") + .run(context -> { + assertThat(context.getBeansOfType(OpenAiImageEditProperties.class)).isNotEmpty(); + assertThat(context.getBeansOfType(OpenAiImageEditModel.class)).isNotEmpty(); + }); + + this.contextRunner + .withConfiguration( + AutoConfigurations.of(OpenAiChatAutoConfiguration.class, OpenAiEmbeddingAutoConfiguration.class, + OpenAiImageEditAutoConfiguration.class, OpenAiAudioSpeechAutoConfiguration.class, + OpenAiAudioTranscriptionAutoConfiguration.class, OpenAiModerationAutoConfiguration.class)) + .withPropertyValues("spring.ai.model.chat=none", "spring.ai.model.embedding=none", + "spring.ai.model.image=none", "spring.ai.model.image.edit=openai", + "spring.ai.model.audio.speech=none", "spring.ai.model.audio.transcription=none", + "spring.ai.model.moderation=none") + .withConfiguration(AutoConfigurations.of(OpenAiImageAutoConfiguration.class)) + .run(context -> { + assertThat(context.getBeansOfType(OpenAiChatModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiEmbeddingModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiImageModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiImageEditModel.class)).isNotEmpty(); + assertThat(context.getBeansOfType(OpenAiAudioSpeechModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiAudioTranscriptionModel.class)).isEmpty(); + assertThat(context.getBeansOfType(OpenAiModerationModel.class)).isEmpty(); + }); + } + @Test void audioSpeechModelActivation() { this.contextRunner.withConfiguration(AutoConfigurations.of(OpenAiAudioSpeechAutoConfiguration.class)) diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java index b1c547eeca5..7bbcb28e2e1 100644 --- a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java @@ -321,6 +321,31 @@ public void imageProperties() { }); } + @Test + public void imageEditProperties() { + new ApplicationContextRunner().withPropertyValues( + // @formatter:off + "spring.ai.openai.base-url=TEST_BASE_URL", + "spring.ai.openai.api-key=abc123", + "spring.ai.openai.image.edit.options.model=MODEL_XYZ", + "spring.ai.openai.image.edit.options.n=3") + // @formatter:on + .withConfiguration(AutoConfigurations.of(OpenAiImageEditAutoConfiguration.class)) + .run(context -> { + var imageEditProperties = context.getBean(OpenAiImageEditProperties.class); + var connectionProperties = context.getBean(OpenAiConnectionProperties.class); + + assertThat(connectionProperties.getApiKey()).isEqualTo("abc123"); + assertThat(connectionProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL"); + + assertThat(imageEditProperties.getApiKey()).isNull(); + assertThat(imageEditProperties.getBaseUrl()).isNull(); + + assertThat(imageEditProperties.getOptions().getModel()).isEqualTo("MODEL_XYZ"); + assertThat(imageEditProperties.getOptions().getN()).isEqualTo(3); + }); + } + @Test public void imageOverrideConnectionProperties() { new ApplicationContextRunner().withPropertyValues( diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiImageOptions.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiImageOptions.java index 10e0d13f47e..537794d9043 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiImageOptions.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiImageOptions.java @@ -232,6 +232,13 @@ public String toString() { public enum ImageModel { + /** + * GPT Image 1 is our new state-of-the-art image generation model. It is a + * natively multimodal language model that accepts both text and image inputs, and + * produces image outputs. + */ + GPT_IMAGE_1("gpt-image-1"), + /** * The latest DALL·E model released in Nov 2023. */ diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageEditModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageEditModel.java new file mode 100644 index 00000000000..e206ea6b68b --- /dev/null +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageEditModel.java @@ -0,0 +1,163 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.openai; + +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.content.Media; +import org.springframework.ai.image.Image; +import org.springframework.ai.image.ImageEditModel; +import org.springframework.ai.image.ImageEditOptions; +import org.springframework.ai.image.ImageEditPrompt; +import org.springframework.ai.image.ImageGeneration; +import org.springframework.ai.image.ImageResponse; +import org.springframework.ai.image.ImageResponseMetadata; +import org.springframework.ai.model.ModelOptionsUtils; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.ai.openai.api.OpenAiImageApi.ImageModel; +import org.springframework.ai.openai.api.OpenAiImageApi.OpenAiImageEditRequest; +import org.springframework.ai.openai.api.OpenAiImageApi.OpenAiImageResponse; +import org.springframework.ai.openai.metadata.OpenAiImageGenerationMetadata; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.http.ResponseEntity; +import org.springframework.retry.support.RetryTemplate; +import org.springframework.util.Assert; + +/** + * OpenAiImageEditModel is a class that implements the ImageModel interface. It provides a + * client for calling the OpenAI image edit API. + * + * @author Minsoo Nam + * @since 1.0.0 + */ +public class OpenAiImageEditModel implements ImageEditModel { + + private static final Logger logger = LoggerFactory.getLogger(OpenAiImageEditModel.class); + + public static final String DEFAULT_IMAGE_EDIT_MODEL = ImageModel.DALL_E_2.getValue(); + + /** + * The default options used for the image completion requests. + */ + private final OpenAiImageEditOptions defaultOptions; + + /** + * The retry template used to retry the OpenAI Image API calls. + */ + private final RetryTemplate retryTemplate; + + /** + * Low-level access to the OpenAI Image API. + */ + private final OpenAiImageApi openAiImageApi; + + /** + * Creates an instance of the OpenAiImageEditModel. + * @param openAiImageApi The OpenAiImageApi instance to be used for interacting with + * the OpenAI Image API. + * @throws IllegalArgumentException if openAiImageApi is null + */ + public OpenAiImageEditModel(OpenAiImageApi openAiImageApi) { + this(openAiImageApi, OpenAiImageEditOptions.builder().build(), RetryUtils.DEFAULT_RETRY_TEMPLATE); + } + + /** + * Initializes a new instance of the OpenAiImageEditModel. + * @param openAiImageApi The OpenAiImageApi instance to be used for interacting with + * the OpenAI Image API. + * @param options The OpenAiImageEditOptions to configure the image model. + * @param retryTemplate The retry template. + */ + public OpenAiImageEditModel(OpenAiImageApi openAiImageApi, OpenAiImageEditOptions options, + RetryTemplate retryTemplate) { + Assert.notNull(openAiImageApi, "OpenAiImageApi must not be null"); + Assert.notNull(options, "options must not be null"); + Assert.notNull(retryTemplate, "retryTemplate must not be null"); + this.openAiImageApi = openAiImageApi; + this.defaultOptions = options; + this.retryTemplate = retryTemplate; + } + + @Override + public ImageResponse call(ImageEditPrompt ImageEditPrompt) { + ImageEditPrompt requestImageEditPrompt = buildRequestImageEditPrompt(ImageEditPrompt); + OpenAiImageEditRequest imageRequest = createRequest(requestImageEditPrompt); + + ResponseEntity imageResponseEntity = this.retryTemplate + .execute(ctx -> this.openAiImageApi.createImageEdit(imageRequest)); + return convertResponse(imageResponseEntity, imageRequest); + } + + private OpenAiImageEditRequest createRequest(ImageEditPrompt imageEditPrompt) { + List images = imageEditPrompt.getInstructions() + .getImage() + .stream() + .map(Media::getDataAsByteArray) + .toList(); + String prompt = imageEditPrompt.getInstructions().getPrompt(); + OpenAiImageEditOptions imageOptions = (OpenAiImageEditOptions) imageEditPrompt.getOptions(); + + OpenAiImageEditRequest imageRequest = new OpenAiImageEditRequest(images, prompt, DEFAULT_IMAGE_EDIT_MODEL); + + return ModelOptionsUtils.merge(imageOptions, imageRequest, OpenAiImageEditRequest.class); + } + + private ImageResponse convertResponse(ResponseEntity imageResponseEntity, + OpenAiImageEditRequest OpenAiImageEditRequest) { + OpenAiImageResponse imageApiResponse = imageResponseEntity.getBody(); + if (imageApiResponse == null) { + logger.warn("No image response returned for request: {}", OpenAiImageEditRequest); + return new ImageResponse(List.of()); + } + + List imageGenerationList = imageApiResponse.data() + .stream() + .map(entry -> new ImageGeneration(new Image(entry.url(), entry.b64Json()), + new OpenAiImageGenerationMetadata(entry.revisedPrompt()))) + .toList(); + + ImageResponseMetadata openAiImageResponseMetadata = new ImageResponseMetadata(imageApiResponse.created()); + return new ImageResponse(imageGenerationList, openAiImageResponseMetadata); + } + + private ImageEditPrompt buildRequestImageEditPrompt(ImageEditPrompt ImageEditPrompt) { + // Process runtime options + OpenAiImageEditOptions runtimeOptions = null; + if (ImageEditPrompt.getOptions() != null) { + runtimeOptions = ModelOptionsUtils.copyToTarget(ImageEditPrompt.getOptions(), ImageEditOptions.class, + OpenAiImageEditOptions.class); + } + + OpenAiImageEditOptions requestOptions = runtimeOptions == null ? this.defaultOptions : OpenAiImageEditOptions + .builder() + // Handle portable image options + .model(ModelOptionsUtils.mergeOption(runtimeOptions.getModel(), this.defaultOptions.getModel())) + .mask(ModelOptionsUtils.mergeOption(runtimeOptions.getMask(), this.defaultOptions.getMask())) + .N(ModelOptionsUtils.mergeOption(runtimeOptions.getN(), this.defaultOptions.getN())) + .responseFormat(ModelOptionsUtils.mergeOption(runtimeOptions.getResponseFormat(), + this.defaultOptions.getResponseFormat())) + // Handle OpenAI specific image options + .quality(ModelOptionsUtils.mergeOption(runtimeOptions.getQuality(), this.defaultOptions.getQuality())) + .user(ModelOptionsUtils.mergeOption(runtimeOptions.getUser(), this.defaultOptions.getUser())) + .build(); + + return new ImageEditPrompt(ImageEditPrompt.getInstructions(), requestOptions); + } + +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageEditOptions.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageEditOptions.java new file mode 100644 index 00000000000..48ffe6d835d --- /dev/null +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageEditOptions.java @@ -0,0 +1,355 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.openai; + +import java.util.Objects; + +import org.springframework.ai.image.ImageEditOptions; +import org.springframework.ai.image.ImageOptions; +import org.springframework.core.io.Resource; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * OpenAI Image Edit API options. OpenAiImageEditOptions.java + * + * @author Minsoo Nam + * @since 1.0.0 + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class OpenAiImageEditOptions implements ImageEditOptions { + + /** + * An additional image whose fully transparent areas (e.g. where alpha is zero) + * indicate where image should be edited. If there are multiple images provided, the + * mask will be applied on the first image. Must be a valid PNG file, less than 4MB, + * and have the same dimensions as image. + */ + @JsonProperty("mask") + private Resource mask; + + /** + * The model to use for image generation. + */ + @JsonProperty("model") + private String model; + + /** + * The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 + * is supported. + */ + @JsonProperty("n") + private Integer n; + + /** + * The quality of the image that will be generated. hd creates images with finer + * details and greater consistency across the image. This param is only supported for + * dall-e-3. + */ + @JsonProperty("quality") + private String quality; + + /** + * The format in which the generated images are returned. Must be one of url or + * b64_json. + */ + @JsonProperty("response_format") + private String responseFormat; + + /** + * The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for + * dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. + * This property is automatically computed when both width and height are set, + * following the format "widthxheight". When setting this property directly, it must + * follow the format "WxH" where W and H are valid integers. Invalid formats will + * result in null width and height values. + */ + @JsonProperty("size") + private String size; + + /** + * A unique identifier representing your end-user, which can help OpenAI to monitor + * and detect abuse. + */ + @JsonProperty("user") + private String user; + + /** + * The width of the generated images. Must be one of 256, 512, or 1024 for dall-e-2. + * This property is interconnected with the 'size' property - setting both width and + * height will automatically compute and set the size in "widthxheight" format. + * Conversely, setting a valid size string will parse and set the individual width and + * height values. + */ + @JsonProperty("size_width") + private Integer width; + + /** + * The height of the generated images. Must be one of 256, 512, or 1024 for dall-e-2. + * This property is interconnected with the 'size' property - setting both width and + * height will automatically compute and set the size in "widthxheight" format. + * Conversely, setting a valid size string will parse and set the individual width and + * height values. + */ + @JsonProperty("size_height") + private Integer height; + + public static Builder builder() { + return new Builder(); + } + + public Resource getMask() { + return this.mask; + } + + public void setMask(Resource mask) { + this.mask = mask; + } + + @Override + public Integer getN() { + return this.n; + } + + public void setN(Integer n) { + this.n = n; + } + + @Override + public String getModel() { + return this.model; + } + + public void setModel(String model) { + this.model = model; + } + + public String getQuality() { + return this.quality; + } + + public void setQuality(String quality) { + this.quality = quality; + } + + @Override + public String getResponseFormat() { + return this.responseFormat; + } + + public void setResponseFormat(String responseFormat) { + this.responseFormat = responseFormat; + } + + @Override + public Integer getWidth() { + if (this.width != null) { + return this.width; + } + else if (this.size != null) { + try { + String[] dimensions = this.size.split("x"); + if (dimensions.length != 2) { + return null; + } + return Integer.parseInt(dimensions[0]); + } + catch (Exception ex) { + return null; + } + } + return null; + } + + public void setWidth(Integer width) { + this.width = width; + if (this.width != null && this.height != null) { + this.size = this.width + "x" + this.height; + } + } + + @Override + public Integer getHeight() { + if (this.height != null) { + return this.height; + } + else if (this.size != null) { + try { + String[] dimensions = this.size.split("x"); + if (dimensions.length != 2) { + return null; + } + return Integer.parseInt(dimensions[1]); + } + catch (Exception ex) { + return null; + } + } + return null; + } + + public void setHeight(Integer height) { + this.height = height; + if (this.width != null && this.height != null) { + this.size = this.width + "x" + this.height; + } + } + + @Override + public String getStyle() { + return null; + } + + public String getUser() { + return this.user; + } + + public void setUser(String user) { + this.user = user; + } + + public String getSize() { + if (this.size != null) { + return this.size; + } + return (this.width != null && this.height != null) ? this.width + "x" + this.height : null; + } + + public void setSize(String size) { + this.size = size; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof OpenAiImageEditOptions that)) { + return false; + } + return Objects.equals(this.mask, that.mask) && Objects.equals(this.n, that.n) + && Objects.equals(this.model, that.model) && Objects.equals(this.width, that.width) + && Objects.equals(this.height, that.height) && Objects.equals(this.quality, that.quality) + && Objects.equals(this.responseFormat, that.responseFormat) && Objects.equals(this.size, that.size) + && Objects.equals(this.user, that.user); + } + + @Override + public int hashCode() { + return Objects.hash(this.mask, this.n, this.model, this.width, this.height, this.quality, this.responseFormat, + this.size, this.user); + } + + @Override + public String toString() { + return "OpenAiImageEditOptions{" + "mask=" + this.mask + "n=" + this.n + ", model='" + this.model + '\'' + + ", width=" + this.width + ", height=" + this.height + ", quality='" + this.quality + '\'' + + ", responseFormat='" + this.responseFormat + '\'' + ", size='" + this.size + '\'' + ", style='" + '\'' + + ", user='" + this.user + '\'' + '}'; + } + + public static final class Builder { + + private final OpenAiImageEditOptions options; + + private Builder() { + this.options = new OpenAiImageEditOptions(); + } + + public Builder mask(Resource mask) { + this.options.setMask(mask); + return this; + } + + public Builder N(Integer n) { + this.options.setN(n); + return this; + } + + public Builder model(String model) { + this.options.setModel(model); + return this; + } + + public Builder quality(String quality) { + this.options.setQuality(quality); + return this; + } + + public Builder responseFormat(String responseFormat) { + this.options.setResponseFormat(responseFormat); + return this; + } + + public Builder width(Integer width) { + this.options.setWidth(width); + return this; + } + + public Builder height(Integer height) { + this.options.setHeight(height); + return this; + } + + public Builder user(String user) { + this.options.setUser(user); + return this; + } + + public Builder withN(Integer n) { + this.options.setN(n); + return this; + } + + public Builder withModel(String model) { + this.options.setModel(model); + return this; + } + + public Builder withQuality(String quality) { + this.options.setQuality(quality); + return this; + } + + public Builder withResponseFormat(String responseFormat) { + this.options.setResponseFormat(responseFormat); + return this; + } + + public Builder withWidth(Integer width) { + this.options.setWidth(width); + return this; + } + + public Builder withHeight(Integer height) { + this.options.setHeight(height); + return this; + } + + public Builder withUser(String user) { + this.options.setUser(user); + return this; + } + + public OpenAiImageEditOptions build() { + return this.options; + } + + } + +} diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java index b09507528f8..c4b7f5d78af 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java @@ -18,14 +18,13 @@ import java.util.List; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; - import org.springframework.ai.model.ApiKey; import org.springframework.ai.model.NoopApiKey; import org.springframework.ai.model.SimpleApiKey; import org.springframework.ai.openai.api.common.OpenAiApiConstants; import org.springframework.ai.retry.RetryUtils; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.core.io.Resource; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.util.Assert; @@ -34,6 +33,9 @@ import org.springframework.web.client.ResponseErrorHandler; import org.springframework.web.client.RestClient; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; + /** * OpenAI Image API. * @@ -62,7 +64,6 @@ public OpenAiImageApi(String baseUrl, ApiKey apiKey, MultiValueMap createImage(OpenAiImageRequest openAi return this.restClient.post() .uri("v1/images/generations") .body(openAiImageRequest) + .contentType(MediaType.APPLICATION_JSON) + .retrieve() + .toEntity(OpenAiImageResponse.class); + } + + public ResponseEntity createImageEdit(OpenAiImageEditRequest openAiImageEditRequest) { + Assert.notNull(openAiImageEditRequest, "Image request cannot be null."); + Assert.hasLength(openAiImageEditRequest.prompt(), "Prompt cannot be empty."); + Assert.notEmpty(openAiImageEditRequest.image(), "Image cannot be empty."); + + MultiValueMap multipartBody = new LinkedMultiValueMap<>(); + openAiImageEditRequest.image().forEach(image -> { + Resource imageResource = new ByteArrayResource(image) { + @Override + public String getFilename() { + return "image.png"; + } + }; + multipartBody.add("image", imageResource); + }); + multipartBody.add("model", openAiImageEditRequest.model()); + multipartBody.add("prompt", openAiImageEditRequest.prompt()); + multipartBody.add("response_format", openAiImageEditRequest.responseFormat()); + multipartBody.add("n", openAiImageEditRequest.n()); + multipartBody.add("quality", openAiImageEditRequest.quality()); + multipartBody.add("size", openAiImageEditRequest.size()); + multipartBody.add("user", openAiImageEditRequest.user()); + if (openAiImageEditRequest.mask() != null) { + Resource imageResource = new ByteArrayResource(openAiImageEditRequest.mask()) { + @Override + public String getFilename() { + return "mask.png"; + } + }; + multipartBody.add("mask", imageResource); + } + + return this.restClient.post() + .uri("v1/images/edits") + .body(multipartBody) + .contentType(MediaType.MULTIPART_FORM_DATA) .retrieve() .toEntity(OpenAiImageResponse.class); } @@ -91,6 +133,13 @@ public static Builder builder() { */ public enum ImageModel { + /** + * GPT Image 1 is our new state-of-the-art image generation model. It is a + * natively multimodal language model that accepts both text and image inputs, and + * produces image outputs. + */ + GPT_IMAGE_1("gpt-image-1"), + /** * The latest DALL·E model released in Nov 2023. */ @@ -132,6 +181,27 @@ public OpenAiImageRequest(String prompt, String model) { } } + @JsonInclude(JsonInclude.Include.NON_NULL) + public record OpenAiImageEditRequest( + @JsonProperty("image") List image , + @JsonProperty("prompt") String prompt, + @JsonProperty("model") String model, + @JsonProperty("mask") byte[] mask, + @JsonProperty("n") Integer n, + @JsonProperty("quality") String quality, + @JsonProperty("response_format") String responseFormat, + @JsonProperty("size") String size, + @JsonProperty("user") String user) { + + public OpenAiImageEditRequest(List images, String prompt, String model) { + this(images, prompt, model, null, null, null, null, null, null); + } + + public OpenAiImageEditRequest(byte[] image, String prompt, String model) { + this(List.of(image), prompt, model); + } + } + @JsonInclude(JsonInclude.Include.NON_NULL) public record OpenAiImageResponse( @JsonProperty("created") Long created, diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageEditModelTest.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageEditModelTest.java new file mode 100644 index 00000000000..47df1b16601 --- /dev/null +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageEditModelTest.java @@ -0,0 +1,110 @@ +package org.springframework.ai.openai.image; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.header; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.method; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.requestTo; +import static org.springframework.test.web.client.response.MockRestResponseCreators.withSuccess; + +import java.util.List; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.springframework.ai.content.Media; +import org.springframework.ai.image.ImageEditMessage; +import org.springframework.ai.image.ImageEditPrompt; +import org.springframework.ai.image.ImageResponse; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.openai.OpenAiImageEditModel; +import org.springframework.ai.openai.OpenAiImageEditOptions; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.test.autoconfigure.web.client.RestClientTest; +import org.springframework.context.annotation.Bean; +import org.springframework.core.io.ClassPathResource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.MediaType; +import org.springframework.test.web.client.MockRestServiceServer; +import org.springframework.util.MimeTypeUtils; +import org.springframework.web.client.RestClient; + +@RestClientTest(OpenAiImageEditModelTest.Config.class) +@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+") +public class OpenAiImageEditModelTest { + + @Autowired + private OpenAiImageEditModel openAiImageEditModel; + + @Autowired + private MockRestServiceServer server; + + @BeforeEach + void setup() { + // Setup code if needed + } + + @AfterEach + void resetMockServer() { + this.server.reset(); + } + + @Test + void imageEditTest() { + prepareMock(); + + Media image = new Media(MimeTypeUtils.IMAGE_PNG, new ClassPathResource("test.png")); + ImageEditPrompt prompt = new ImageEditPrompt(new ImageEditMessage(List.of(image), "Add a sunset background."), + OpenAiImageEditOptions.builder().build()); + + ImageResponse response = this.openAiImageEditModel.call(prompt); + + assertThat(response).isNotNull(); + assertThat(response.getResults()).hasSize(1); + } + + private void prepareMock() { + HttpHeaders httpHeaders = new HttpHeaders(); + httpHeaders.set("Some-Header", "SomeValue"); + + this.server.expect(requestTo("https://api.openai.com/v1/images/edits")) + .andExpect(method(HttpMethod.POST)) + .andExpect(header(HttpHeaders.AUTHORIZATION, "Bearer OPENAI_API_KEY")) + .andRespond(withSuccess(getJson(), MediaType.APPLICATION_JSON).headers(httpHeaders)); + } + + private String getJson() { + return """ + { + "created": 1589478378, + "data": [ + { + "url": "https://example.com/edited-image.jpg" + } + ] + } + """; + } + + @SpringBootConfiguration + static class Config { + + @Bean + public OpenAiImageApi imageApi(RestClient.Builder builder) { + return OpenAiImageApi.builder() + .apiKey(new SimpleApiKey("OPENAI_API_KEY")) + .restClientBuilder(builder) + .build(); + } + + @Bean + public OpenAiImageEditModel openAiImageEditModel(OpenAiImageApi openAiImageApi) { + return new OpenAiImageEditModel(openAiImageApi); + } + + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditMessage.java b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditMessage.java new file mode 100644 index 00000000000..b3244095e11 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditMessage.java @@ -0,0 +1,42 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.image; + +import java.util.List; + +import org.springframework.ai.content.Media; + +public class ImageEditMessage { + + private List image; + + private String prompt; + + public ImageEditMessage(List image, String prompt) { + this.image = image; + this.prompt = prompt; + } + + public List getImage() { + return image; + } + + public String getPrompt() { + return prompt; + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditModel.java b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditModel.java new file mode 100644 index 00000000000..d3ddf6cfc1f --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditModel.java @@ -0,0 +1,26 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.image; + +import org.springframework.ai.model.Model; + +@FunctionalInterface +public interface ImageEditModel extends Model { + + ImageResponse call(ImageEditPrompt request); + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditOptions.java b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditOptions.java new file mode 100644 index 00000000000..629dd288aed --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditOptions.java @@ -0,0 +1,36 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.image; + +import org.springframework.core.io.Resource; +import org.springframework.lang.Nullable; + +public interface ImageEditOptions extends ImageOptions { + + @Nullable + Resource getMask(); + + @Nullable + String getQuality(); + + @Nullable + String getSize(); + + @Nullable + String getUser(); + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditPrompt.java b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditPrompt.java new file mode 100644 index 00000000000..9261f6feb3b --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/image/ImageEditPrompt.java @@ -0,0 +1,62 @@ +/* + * Copyright 2023-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.image; + +import java.util.Objects; + +import org.springframework.ai.model.ModelRequest; + +public class ImageEditPrompt implements ModelRequest { + + private final ImageEditMessage message; + + private ImageEditOptions imageEditModelOptions; + + public ImageEditPrompt(ImageEditMessage message, ImageEditOptions imageEditModelOptions) { + this.message = message; + this.imageEditModelOptions = imageEditModelOptions; + } + + @Override + public ImageEditMessage getInstructions() { + return message; + } + + @Override + public ImageEditOptions getOptions() { + return this.imageEditModelOptions; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ImageEditPrompt that = (ImageEditPrompt) o; + return Objects.equals(message, that.message) + && Objects.equals(imageEditModelOptions, that.imageEditModelOptions); + } + + @Override + public int hashCode() { + return Objects.hash(message, imageEditModelOptions); + } + +} diff --git a/spring-ai-model/src/main/java/org/springframework/ai/model/SpringAIModelProperties.java b/spring-ai-model/src/main/java/org/springframework/ai/model/SpringAIModelProperties.java index 77aa39ca263..ac00e1537dc 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/model/SpringAIModelProperties.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/model/SpringAIModelProperties.java @@ -34,6 +34,8 @@ private SpringAIModelProperties() { public static final String IMAGE_MODEL = MODEL_PREFIX + ".image"; + public static final String IMAGE_EDIT_MODEL = MODEL_PREFIX + ".image.edit"; + public static final String AUDIO_TRANSCRIPTION_MODEL = MODEL_PREFIX + ".audio.transcription"; public static final String AUDIO_SPEECH_MODEL = MODEL_PREFIX + ".audio.speech";