From 6a9743dc07fa3aefe72618b5eb4cf0f2665e00b5 Mon Sep 17 00:00:00 2001 From: Himanich <41684337+Himanich@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:41:44 +0530 Subject: [PATCH] Handle UUID conflicts (#3) * Resolve conflicts Co-authored-by: Himani Chauhan --- oak-upgrade/pom.xml | 19 ++- .../oak/upgrade/ImageEmbeddingComparison.java | 111 ++++++++++-------- .../oak/upgrade/UUIDConflictDetector.java | 49 ++++---- 3 files changed, 98 insertions(+), 81 deletions(-) diff --git a/oak-upgrade/pom.xml b/oak-upgrade/pom.xml index 5e4a83d6ec0..0f95f8ea43d 100644 --- a/oak-upgrade/pom.xml +++ b/oak-upgrade/pom.xml @@ -246,20 +246,29 @@ ${project.version} test + ai.djl api - 0.19.0 + 0.22.0 + ai.djl.tensorflow tensorflow-engine 0.22.0 + - ai.djl.pytorch - pytorch-engine - 0.23.0 + ai.djl.tensorflow + tensorflow-model-zoo + 0.22.0 + + + + ai.djl + basicdataset + 0.22.0 @@ -288,5 +297,7 @@ 1.1.1 test + + diff --git a/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/ImageEmbeddingComparison.java b/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/ImageEmbeddingComparison.java index a879ab3ddee..ca41ddcbdf4 100644 --- a/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/ImageEmbeddingComparison.java +++ b/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/ImageEmbeddingComparison.java @@ -1,84 +1,91 @@ package org.apache.jackrabbit.oak.upgrade; -import ai.djl.Application; import ai.djl.Model; -import ai.djl.ModelException; import ai.djl.inference.Predictor; +import ai.djl.modality.Classifications; import ai.djl.modality.cv.Image; import ai.djl.modality.cv.ImageFactory; +import ai.djl.modality.cv.transform.Normalize; +import ai.djl.modality.cv.transform.Resize; +import ai.djl.modality.cv.transform.ToTensor; import ai.djl.ndarray.NDArray; import ai.djl.ndarray.NDList; +import ai.djl.ndarray.types.DataType; +import ai.djl.ndarray.types.Shape; +import ai.djl.repository.zoo.Criteria; +import ai.djl.repository.zoo.ModelZoo; import ai.djl.translate.Batchifier; import ai.djl.translate.TranslateException; import ai.djl.translate.Translator; import ai.djl.translate.TranslatorContext; -import ai.djl.translate.TranslatorFactory; -import java.io.IOException; +import java.io.FileInputStream; import java.io.InputStream; public class ImageEmbeddingComparison { - public static void compareImages(InputStream sourceStream, InputStream targetStream) throws IOException, ModelException, TranslateException { - // Load the source and target streams (replace with actual streams) -// InputStream sourceStream = getSourceStream(); // Replace with your actual stream -// InputStream targetStream = getTargetStream(); // Replace with your actual stream + public static void compareImages(InputStream inputStream1, InputStream inputStream2) throws Exception { + // Paths to your images +// InputStream inputStream1 = new FileInputStream("path_to_image1.jpg"); +// InputStream inputStream2 = new FileInputStream("path_to_image2.jpg"); - // Pre-process the images from input streams - Image sourceImage = ImageFactory.getInstance().fromInputStream(sourceStream); - Image targetImage = ImageFactory.getInstance().fromInputStream(targetStream); + // Load images using DJL ImageFactory + Image img1 = ImageFactory.getInstance().fromInputStream(inputStream1); + Image img2 = ImageFactory.getInstance().fromInputStream(inputStream2); - // Load the pre-trained CLIP model - String modelPath = "huggingface/clip-vit-base-patch16"; // Replace with actual Hugging Face model path - // Load the model using PyTorch engine - Model model = Model.newInstance(modelPath); // Set application to IMAGE_CLASSIFICATION - // Extract embeddings from both images using CLIP - float[] sourceEmbedding = extractImageEmbedding(model, sourceImage); - float[] targetEmbedding = extractImageEmbedding(model, targetImage); + // Load the pre-trained ResNet model for feature extraction (ResNet50 or ResNet18, etc.) + Criteria criteria = Criteria.builder() + .setTypes(Image.class, NDArray.class) + .optTranslator(new FeatureExtractionTranslator()) + .build(); - // Calculate cosine similarity between the image embeddings - double similarity = cosineSimilarity(sourceEmbedding, targetEmbedding); - System.out.println("Cosine Similarity: " + similarity); - } + try (Model model = ModelZoo.loadModel(criteria)) { + // Create a predictor for feature extraction + try (Predictor predictor = model.newPredictor(new FeatureExtractionTranslator())) { + // Extract features for both images + NDArray feature1 = predictor.predict(img1); + NDArray feature2 = predictor.predict(img2); - private static float[] extractImageEmbedding(Model model, Image image) throws ModelException, TranslateException { - Translator translator = new Translator() { - @Override - public NDList processInput(TranslatorContext ctx, Image input) { - NDArray array = input.toNDArray(ctx.getNDManager()); - return new NDList(array); + // Compute cosine similarity + float similarity = computeCosineSimilarity(feature1, feature2); + System.out.println("Cosine Similarity: " + similarity); } + } + } - @Override - public float[] processOutput(TranslatorContext ctx, NDList list) { - return list.singletonOrThrow().toFloatArray(); - } + // Helper function to compute cosine similarity + private static float computeCosineSimilarity(NDArray a, NDArray b) { + float dotProduct = a.dot(b).getFloat(); + float normA = a.norm().getFloat(); + float normB = b.norm().getFloat(); + return dotProduct / (normA * normB); + } - @Override - public Batchifier getBatchifier() { - return null; // No batching needed - } - }; + // Translator for feature extraction using ResNet + private static class FeatureExtractionTranslator implements Translator { - // Use the predictor to extract image embedding - try (Predictor predictor = model.newPredictor(translator)) { - return predictor.predict(image); + @Override + public NDList processInput(TranslatorContext ctx, Image input) { + // Resize, convert to tensor, and normalize the image + NDArray array = input.toNDArray(ctx.getNDManager(), Image.Flag.COLOR); + array = array.toType(DataType.FLOAT32, false); + array = array.div(255f); // Normalize pixel values to [0, 1] + // array = Resize.resize(array, new Shape(224, 224, 3)); + //array = Normalize.normalize(array, new float[]{0.485f, 0.456f, 0.406f}, + // new float[]{0.229f, 0.224f, 0.225f}); + array = array.transpose(2, 0, 1); // Convert to CHW format for PyTorch + return new NDList(array.expandDims(0)); // Add batch dimension } - } - // Method to calculate cosine similarity - private static double cosineSimilarity(float[] vectorA, float[] vectorB) { - double dotProduct = 0.0; - double normA = 0.0; - double normB = 0.0; - - for (int i = 0; i < vectorA.length; i++) { - dotProduct += vectorA[i] * vectorB[i]; - normA += Math.pow(vectorA[i], 2); - normB += Math.pow(vectorB[i], 2); + @Override + public NDArray processOutput(TranslatorContext ctx, NDList list) { + return list.singletonOrThrow(); // Extract the model output } - return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); + @Override + public Batchifier getBatchifier() { + return null; // No batching needed + } } } diff --git a/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/UUIDConflictDetector.java b/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/UUIDConflictDetector.java index 003dc122cf2..61ef0b2edc9 100644 --- a/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/UUIDConflictDetector.java +++ b/oak-upgrade/src/main/java/org/apache/jackrabbit/oak/upgrade/UUIDConflictDetector.java @@ -254,31 +254,30 @@ private void resolveConflict(String sourcePath, String targetPath) throws IOExce boolean isMetadataMatch = compareMetadata(sourcePath, targetPath); log.info("metadata match for source path: {}, target path: {} isMetadataMatch: {}", sourcePath, targetPath, isMetadataMatch); if (isMetadataMatch) { - // proceed with Binary Comparison - NodeState sourceNode = getNodeAtPath(sourceStore.getRoot(), sourcePath); - NodeState targetNode = getNodeAtPath(targetStore.getRoot(), targetPath); - - if (isImage(sourceNode) && isImage(targetNode)) { - try (InputStream sourceStream = getBinaryContent(sourceNode); - InputStream targetStream = getBinaryContent(targetNode)) { - log.info("source stream: {}, target stream: {}", sourceStream, targetStream); - if (sourceStream != null && targetStream != null) { - // Send to image comparison service/ - ImageEmbeddingComparison.compareImages(sourceStream, targetStream); - log.info("Image comparison completed for source and target images"); - } else { - log.warn("Failed to fetch InputStream for source or target image. SourceStream: {}, TargetStream: {}", sourceStream, targetStream); - } - } catch (ModelException e) { - log.warn("Failed to fetch InputStream for source or target image. SourceStream"); - } catch (TranslateException e) { - log.warn("Failed to fetch InputStream for source or target image. SourceStream"); - } - } - - } else { - log.info("metadata mismatch for source path: {}, target path: {}", sourcePath, targetPath); - } + // proceed with Binary Comparison + NodeState sourceNode = getNodeAtPath(sourceStore.getRoot(), sourcePath); + NodeState targetNode = getNodeAtPath(targetStore.getRoot(), targetPath); + + if (isImage(sourceNode) && isImage(targetNode)) { + try (InputStream sourceStream = getBinaryContent(sourceNode); + InputStream targetStream = getBinaryContent(targetNode)) { + log.info("source stream: {}, target stream: {}", sourceStream, targetStream); + if (sourceStream != null && targetStream != null) { + // Send to image comparison service/ + ImageEmbeddingComparison.compareImages(sourceStream, targetStream); + log.info("Image comparison completed for source and target images"); + } else { + log.warn("Failed to fetch InputStream for source or target image. SourceStream: {}, TargetStream: {}", sourceStream, targetStream); + } + } catch (Exception e) { + log.warn(e.getMessage()); + log.warn("Failed to fetch InputStream for source or target image. SourceStream"); + } + } + + } else { + log.info("metadata mismatch for source path: {}, target path: {}", sourcePath, targetPath); + } } private boolean compareMetadata(String sourcePath, String targetPath) {