diff --git a/docs/js/inject-api-links.js b/docs/js/inject-api-links.js
index 6c8a4a3b3..89082c67d 100644
--- a/docs/js/inject-api-links.js
+++ b/docs/js/inject-api-links.js
@@ -1,18 +1,17 @@
 window.addEventListener("DOMContentLoaded", function () {
     var windowPathNameSplits = window.location.pathname.split("/");
-    var majorVersionRegex = new RegExp("(\\d+[.]\\d+)")
+    var majorVersionRegex = new RegExp("(\\d+[.]\\d+)");
     var latestRegex = new RegExp("latest");
-    if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/3.0 - URL contains major version
+    if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/4.0 - URL contains major version
         // Version API dropdown
         document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/generated/api/login/";
-        document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + windowPathNameSplits[1] + "/generated/api/connection_api/";
-        document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + windowPathNameSplits[1] + "/generated/connection_api/";
-    } else { // on docs.hopsworks.api/feature-store-api/3.0 / docs.hopsworks.api/hopsworks-api/3.0 / docs.hopsworks.api/machine-learning-api/3.0
+        document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/javadoc";
+    } else { // on / docs.hopsworks.api/hopsworks-api/4.0
         if (latestRegex.test(windowPathNameSplits[2]) || latestRegex.test(windowPathNameSplits[1])) {
-          var majorVersion = "latest";
+            var majorVersion = "latest";
         } else {
-          var apiVersion = windowPathNameSplits[2];
-          var majorVersion = apiVersion.match(majorVersionRegex)[0];
+            var apiVersion = windowPathNameSplits[2];
+            var majorVersion = apiVersion.match(majorVersionRegex)[0];
         }
         // Version main navigation
         document.getElementsByClassName("md-tabs__link")[0].href = "https://docs.hopsworks.ai/" + majorVersion;
@@ -24,8 +23,6 @@ window.addEventListener("DOMContentLoaded", function () {
         document.getElementsByClassName("md-tabs__link")[6].href = "https://docs.hopsworks.ai/" + majorVersion + "/admin/";
         // Version API dropdown
         document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/generated/api/login/";
-        document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/generated/api/connection_api/";
-        document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/javadoc";
-        document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + majorVersion + "/generated/connection_api/";
+        document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/javadoc";
     }
 });
diff --git a/java/beam/pom.xml b/java/beam/pom.xml
index b240612d9..c90394fa5 100644
--- a/java/beam/pom.xml
+++ b/java/beam/pom.xml
@@ -5,7 +5,7 @@
   <parent>
     <artifactId>hsfs-parent</artifactId>
     <groupId>com.logicalclocks</groupId>
-    <version>4.1.0-SNAPSHOT</version>
+    <version>4.2.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java b/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java
index c059520f7..db01f295a 100644
--- a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java
+++ b/java/beam/src/main/java/com/logicalclocks/hsfs/beam/FeatureStore.java
@@ -160,7 +160,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
   @Override
   public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version, String description,
       List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey, boolean onlineEnabled,
-      StatisticsConfig statisticsConfig, String eventTime, OnlineConfig onlineConfig)
+      TimeTravelFormat timeTravelFormat, StatisticsConfig statisticsConfig, String eventTime, OnlineConfig onlineConfig)
       throws IOException, FeatureStoreException {
     throw new UnsupportedOperationException("Not supported for Beam");
   }
diff --git a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java b/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java
index 9d3c41ee6..e74b51ade 100644
--- a/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java
+++ b/java/beam/src/main/java/com/logicalclocks/hsfs/beam/StreamFeatureGroup.java
@@ -17,6 +17,14 @@
 
 package com.logicalclocks.hsfs.beam;
 
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.beam.sdk.values.PCollection;
+
 import com.logicalclocks.hsfs.Feature;
 import com.logicalclocks.hsfs.FeatureGroupBase;
 import com.logicalclocks.hsfs.FeatureStoreException;
@@ -26,19 +34,14 @@
 import com.logicalclocks.hsfs.StatisticsConfig;
 import com.logicalclocks.hsfs.Storage;
 import com.logicalclocks.hsfs.StorageConnector;
-import com.logicalclocks.hsfs.beam.engine.FeatureGroupEngine;
+import com.logicalclocks.hsfs.TimeTravelFormat;
 import com.logicalclocks.hsfs.beam.engine.BeamProducer;
+import com.logicalclocks.hsfs.beam.engine.FeatureGroupEngine;
 import com.logicalclocks.hsfs.constructor.QueryBase;
 import com.logicalclocks.hsfs.metadata.Statistics;
+
 import lombok.Builder;
 import lombok.NonNull;
-import org.apache.beam.sdk.values.PCollection;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
 
 public class StreamFeatureGroup extends FeatureGroupBase<PCollection<Object>> {
 
@@ -48,8 +51,9 @@ public class StreamFeatureGroup extends FeatureGroupBase<PCollection<Object>> {
   @Builder
   public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integer version, String description,
       List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey,
-      boolean onlineEnabled, List<Feature> features, StatisticsConfig statisticsConfig, String onlineTopicName,
-      String eventTime, OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
+      boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features,
+      StatisticsConfig statisticsConfig, String onlineTopicName, String eventTime,
+      OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
     this();
     this.featureStore = featureStore;
     this.name = name;
@@ -61,6 +65,7 @@ public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integ
       ? partitionKeys.stream().map(String::toLowerCase).collect(Collectors.toList()) : null;
     this.hudiPrecombineKey = hudiPrecombineKey != null ? hudiPrecombineKey.toLowerCase() : null;
     this.onlineEnabled = onlineEnabled;
+    this.timeTravelFormat = timeTravelFormat != null ? timeTravelFormat : TimeTravelFormat.HUDI;
     this.features = features;
     this.statisticsConfig = statisticsConfig != null ? statisticsConfig : new StatisticsConfig();
     this.onlineTopicName = onlineTopicName;
diff --git a/java/flink/pom.xml b/java/flink/pom.xml
index 7e39ece2a..11564004f 100644
--- a/java/flink/pom.xml
+++ b/java/flink/pom.xml
@@ -5,7 +5,7 @@
   <parent>
     <artifactId>hsfs-parent</artifactId>
     <groupId>com.logicalclocks</groupId>
-    <version>4.1.0-SNAPSHOT</version>
+    <version>4.2.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java b/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java
index b6314bad4..60dcbaeb6 100644
--- a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java
+++ b/java/flink/src/main/java/com/logicalclocks/hsfs/flink/FeatureStore.java
@@ -165,8 +165,9 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
   public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version, String description,
                                                           List<String> primaryKeys, List<String> partitionKeys,
                                                           String hudiPrecombineKey, boolean onlineEnabled,
-                                                          StatisticsConfig statisticsConfig, String eventTime,
-                                                          OnlineConfig onlineConfig)
+                                                          TimeTravelFormat timeTravelFormat,
+                                                          StatisticsConfig statisticsConfig,
+                                                          String eventTime, OnlineConfig onlineConfig)
       throws IOException, FeatureStoreException {
     throw new UnsupportedOperationException("Not supported for Flink");
   }
diff --git a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java b/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java
index c3cd6cbd0..0fa821fb3 100644
--- a/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java
+++ b/java/flink/src/main/java/com/logicalclocks/hsfs/flink/StreamFeatureGroup.java
@@ -17,6 +17,15 @@
 
 package com.logicalclocks.hsfs.flink;
 
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.logicalclocks.hsfs.Feature;
 import com.logicalclocks.hsfs.FeatureGroupBase;
@@ -27,22 +36,14 @@
 import com.logicalclocks.hsfs.StatisticsConfig;
 import com.logicalclocks.hsfs.Storage;
 import com.logicalclocks.hsfs.StorageConnector;
+import com.logicalclocks.hsfs.TimeTravelFormat;
 import com.logicalclocks.hsfs.constructor.QueryBase;
-
+import com.logicalclocks.hsfs.flink.engine.FeatureGroupEngine;
 import com.logicalclocks.hsfs.metadata.Statistics;
 
-import com.logicalclocks.hsfs.flink.engine.FeatureGroupEngine;
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.NonNull;
-import org.apache.flink.streaming.api.datastream.DataStream;
-import org.apache.flink.streaming.api.datastream.DataStreamSink;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
 
 @AllArgsConstructor
 @JsonIgnoreProperties(ignoreUnknown = true)
@@ -53,9 +54,9 @@ public class StreamFeatureGroup extends FeatureGroupBase<DataStream<?>> {
   @Builder
   public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integer version, String description,
       List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey,
-      boolean onlineEnabled, List<Feature> features, StatisticsConfig statisticsConfig,
-      String onlineTopicName, String topicName, String notificationTopicName, String eventTime,
-      OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
+      boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features,
+      StatisticsConfig statisticsConfig, String onlineTopicName, String topicName, String notificationTopicName,
+      String eventTime, OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
     this();
     this.featureStore = featureStore;
     this.name = name;
@@ -67,6 +68,7 @@ public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integ
       ? partitionKeys.stream().map(String::toLowerCase).collect(Collectors.toList()) : null;
     this.hudiPrecombineKey = hudiPrecombineKey != null ? hudiPrecombineKey.toLowerCase() : null;
     this.onlineEnabled = onlineEnabled;
+    this.timeTravelFormat = timeTravelFormat != null ? timeTravelFormat : TimeTravelFormat.HUDI;
     this.features = features;
     this.statisticsConfig = statisticsConfig != null ? statisticsConfig : new StatisticsConfig();
     this.onlineTopicName = onlineTopicName;
diff --git a/java/hsfs/pom.xml b/java/hsfs/pom.xml
index c56061427..b7bd606c2 100644
--- a/java/hsfs/pom.xml
+++ b/java/hsfs/pom.xml
@@ -5,7 +5,7 @@
   <parent>
     <artifactId>hsfs-parent</artifactId>
     <groupId>com.logicalclocks</groupId>
-    <version>4.1.0-SNAPSHOT</version>
+    <version>4.2.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java b/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java
index ad391ef90..057838cad 100644
--- a/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java
+++ b/java/hsfs/src/main/java/com/logicalclocks/hsfs/FeatureStoreBase.java
@@ -122,8 +122,8 @@ public abstract Object getOrCreateStreamFeatureGroup(String name, Integer versio
   public abstract Object getOrCreateStreamFeatureGroup(String name, Integer version, String description,
                                                 List<String> primaryKeys, List<String> partitionKeys,
                                                 String hudiPrecombineKey, boolean onlineEnabled,
-                                                StatisticsConfig statisticsConfig, String eventTime,
-                                                OnlineConfig onlineConfig)
+                                                TimeTravelFormat timeTravelFormat, StatisticsConfig statisticsConfig,
+                                                String eventTime, OnlineConfig onlineConfig)
       throws IOException, FeatureStoreException;
 
   public abstract Object  createExternalFeatureGroup();
diff --git a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java b/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java
index 4e0fb0419..d6c3d0b2e 100644
--- a/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java
+++ b/java/hsfs/src/main/java/com/logicalclocks/hsfs/TimeTravelFormat.java
@@ -19,5 +19,6 @@
 
 public enum TimeTravelFormat {
   NONE,
-  HUDI
+  HUDI,
+  DELTA
 }
diff --git a/java/pom.xml b/java/pom.xml
index cc3dd776c..0a5cc707f 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -7,7 +7,7 @@
     <groupId>com.logicalclocks</groupId>
     <artifactId>hsfs-parent</artifactId>
     <packaging>pom</packaging>
-    <version>4.1.0-SNAPSHOT</version>
+    <version>4.2.0-SNAPSHOT</version>
     <modules>
         <module>hsfs</module>
         <module>spark</module>
diff --git a/java/spark/pom.xml b/java/spark/pom.xml
index 185da5d20..4c2d188fb 100644
--- a/java/spark/pom.xml
+++ b/java/spark/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <artifactId>hsfs-parent</artifactId>
         <groupId>com.logicalclocks</groupId>
-        <version>4.1.0-SNAPSHOT</version>
+        <version>4.2.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java b/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java
index 33e3b6058..65dbc66d7 100644
--- a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java
+++ b/java/spark/src/main/java/com/logicalclocks/hsfs/spark/FeatureStore.java
@@ -404,7 +404,7 @@ public StreamFeatureGroup.StreamFeatureGroupBuilder createStreamFeatureGroup() {
   public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version)
       throws IOException, FeatureStoreException {
     return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, null,
-        null, null, null, false, null, null, null);
+        null, null, null, false, TimeTravelFormat.HUDI, null, null, null);
   }
 
   /**
@@ -438,7 +438,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
                                                           boolean onlineEnabled, String eventTime)
       throws IOException, FeatureStoreException {
     return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, null,
-        primaryKeys, null, null, onlineEnabled, null, eventTime, null);
+        primaryKeys, null, null, onlineEnabled, TimeTravelFormat.HUDI, null, eventTime, null);
   }
 
   /**
@@ -477,7 +477,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
 
 
     return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, null,
-        primaryKeys, partitionKeys, null, onlineEnabled, null, eventTime, null);
+        primaryKeys, partitionKeys, null, onlineEnabled, TimeTravelFormat.HUDI, null, eventTime, null);
   }
 
   /**
@@ -506,6 +506,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
    *                          the first primary key of the feature group will be used as hudi precombine key.
    * @param onlineEnabled Define whether the feature group should be made available also in the online feature store
    *                      for low latency access.
+   * @param timeTravelFormat Format used for time travel, defaults to `"HUDI"`.
    * @param statisticsConfig  A configuration object, to generally enable descriptive statistics computation for
    *                          this feature group, `"correlations`" to turn on feature correlation  computation,
    *                          `"histograms"` to compute feature value frequencies and `"exact_uniqueness"` to compute
@@ -523,13 +524,14 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
   public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version, String description,
                                                           List<String> primaryKeys, List<String> partitionKeys,
                                                           String hudiPrecombineKey, boolean onlineEnabled,
-                                                          StatisticsConfig statisticsConfig, String eventTime,
-                                                          OnlineConfig onlineConfig)
+                                                          TimeTravelFormat timeTravelFormat,
+                                                          StatisticsConfig statisticsConfig,
+                                                          String eventTime, OnlineConfig onlineConfig)
       throws IOException, FeatureStoreException {
 
     return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, description,
-        primaryKeys, partitionKeys, hudiPrecombineKey, onlineEnabled, statisticsConfig, eventTime,
-        onlineConfig);
+        primaryKeys, partitionKeys, hudiPrecombineKey, onlineEnabled, timeTravelFormat,
+        statisticsConfig, eventTime, onlineConfig);
   }
 
   /**
diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java b/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java
index 0c8b9bae3..4f423e8f3 100644
--- a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java
+++ b/java/spark/src/main/java/com/logicalclocks/hsfs/spark/StreamFeatureGroup.java
@@ -17,13 +17,23 @@
 
 package com.logicalclocks.hsfs.spark;
 
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 
-import com.logicalclocks.hsfs.spark.constructor.Query;
-import com.logicalclocks.hsfs.spark.engine.FeatureGroupEngine;
-import com.logicalclocks.hsfs.spark.engine.StatisticsEngine;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.logicalclocks.hsfs.EntityEndpointType;
 import com.logicalclocks.hsfs.Feature;
+import com.logicalclocks.hsfs.FeatureGroupBase;
 import com.logicalclocks.hsfs.FeatureStoreException;
 import com.logicalclocks.hsfs.HudiOperationType;
 import com.logicalclocks.hsfs.JobConfiguration;
@@ -31,26 +41,16 @@
 import com.logicalclocks.hsfs.StatisticsConfig;
 import com.logicalclocks.hsfs.Storage;
 import com.logicalclocks.hsfs.StorageConnector;
-import com.logicalclocks.hsfs.FeatureGroupBase;
+import com.logicalclocks.hsfs.TimeTravelFormat;
 import com.logicalclocks.hsfs.metadata.Statistics;
+import com.logicalclocks.hsfs.spark.constructor.Query;
+import com.logicalclocks.hsfs.spark.engine.FeatureGroupEngine;
+import com.logicalclocks.hsfs.spark.engine.StatisticsEngine;
 
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.NonNull;
 
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SaveMode;
-import org.apache.spark.sql.streaming.StreamingQuery;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
 @AllArgsConstructor
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class StreamFeatureGroup extends FeatureGroupBase<Dataset<Row>> {
@@ -61,9 +61,10 @@ public class StreamFeatureGroup extends FeatureGroupBase<Dataset<Row>> {
   @Builder
   public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integer version, String description,
                             List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey,
-                            boolean onlineEnabled, List<Feature> features, StatisticsConfig statisticsConfig,
-                            String onlineTopicName, String topicName, String notificationTopicName, String eventTime,
-                            OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
+                            boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features,
+                            StatisticsConfig statisticsConfig, String onlineTopicName, String topicName,
+                            String notificationTopicName, String eventTime, OnlineConfig onlineConfig,
+                            StorageConnector storageConnector, String path) {
     this();
     this.featureStore = featureStore;
     this.name = name;
@@ -75,6 +76,7 @@ public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integ
         ? partitionKeys.stream().map(String::toLowerCase).collect(Collectors.toList()) : null;
     this.hudiPrecombineKey = hudiPrecombineKey != null ? hudiPrecombineKey.toLowerCase() : null;
     this.onlineEnabled = onlineEnabled;
+    this.timeTravelFormat = timeTravelFormat != null ? timeTravelFormat : TimeTravelFormat.HUDI;
     this.features = features;
     this.statisticsConfig = statisticsConfig != null ? statisticsConfig : new StatisticsConfig();
     this.onlineTopicName = onlineTopicName;
diff --git a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java b/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java
index 96ddfd5f2..f791d8bcd 100644
--- a/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java
+++ b/java/spark/src/main/java/com/logicalclocks/hsfs/spark/engine/FeatureGroupEngine.java
@@ -364,7 +364,8 @@ public List<FeatureGroup> getFeatureGroups(FeatureStore featureStore, String fgN
   public StreamFeatureGroup getOrCreateStreamFeatureGroup(FeatureStore featureStore, String name, Integer version,
                                                           String description, List<String> primaryKeys,
                                                           List<String> partitionKeys, String hudiPrecombineKey,
-                                                          boolean onlineEnabled, StatisticsConfig statisticsConfig,
+                                                          boolean onlineEnabled, TimeTravelFormat timeTravelFormat,
+                                                          StatisticsConfig statisticsConfig,
                                                           String eventTime, OnlineConfig onlineConfig)
       throws IOException, FeatureStoreException {
     StreamFeatureGroup featureGroup;
@@ -381,6 +382,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(FeatureStore featureStor
             .partitionKeys(partitionKeys)
             .hudiPrecombineKey(hudiPrecombineKey)
             .onlineEnabled(onlineEnabled)
+            .timeTravelFormat(timeTravelFormat)
             .statisticsConfig(statisticsConfig)
             .eventTime(eventTime)
             .onlineConfig(onlineConfig)
diff --git a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java b/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java
index bedd9716e..86a85bbdc 100644
--- a/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java
+++ b/java/spark/src/test/java/com/logicalclocks/hsfs/spark/TestFeatureGroup.java
@@ -20,6 +20,7 @@
 import com.logicalclocks.hsfs.Feature;
 import com.logicalclocks.hsfs.FeatureStoreException;
 import com.logicalclocks.hsfs.Project;
+import com.logicalclocks.hsfs.TimeTravelFormat;
 import com.logicalclocks.hsfs.metadata.FeatureGroupApi;
 import com.logicalclocks.hsfs.FeatureGroupBase;
 import com.logicalclocks.hsfs.metadata.HopsworksClient;
@@ -67,7 +68,7 @@ public void testFeatureGroupPrimaryKey() {
 
     StreamFeatureGroup featureGroup = new StreamFeatureGroup(featureStore, "fgName", 1, "description",
         Collections.singletonList("primaryKey"), Collections.singletonList("partitionKey"), "hudiPrecombineKey",
-        true, features, null, "onlineTopicName", null, null, null, null, null, null);
+        true, TimeTravelFormat.HUDI, features, null, "onlineTopicName", null, null, null, null, null, null);
 
     Exception pkException = assertThrows(FeatureStoreException.class, () -> {
       featureGroupEngine.saveFeatureGroupMetaData(featureGroup,
@@ -93,7 +94,7 @@ public void testFeatureGroupEventTimeFeature() {
 
     StreamFeatureGroup featureGroup = new StreamFeatureGroup(featureStore, "fgName", 1, "description",
         Collections.singletonList("featureA"), null, null,
-        true, features, null, "onlineTopicName", null, null, "eventTime", null, null, null);
+        true, TimeTravelFormat.HUDI, features, null, "onlineTopicName", null, null, "eventTime", null, null, null);
 
     Exception eventTimeException = assertThrows(FeatureStoreException.class, () -> {
       streamFeatureGroupEngine.saveFeatureGroupMetaData(featureGroup,
@@ -119,7 +120,7 @@ public void testFeatureGroupPartitionPrecombineKeys() {
 
     StreamFeatureGroup featureGroup = new StreamFeatureGroup(featureStore, "fgName", 1, "description",
         Collections.singletonList("featureA"), Collections.singletonList("partitionKey"), "hudiPrecombineKey",
-        true, features, null, "onlineTopicName", null, null, null, null, null, null);
+        true, TimeTravelFormat.HUDI, features, null, "onlineTopicName", null, null, null, null, null, null);
 
     Exception partitionException = assertThrows(FeatureStoreException.class, () -> {
       streamFeatureGroupEngine.saveFeatureGroupMetaData(featureGroup,
@@ -164,7 +165,7 @@ public void testFeatureGroupAppendFeaturesResetSubject() throws FeatureStoreExce
 
     StreamFeatureGroup featureGroup = new StreamFeatureGroup(featureStore, "fgName", 1, "description",
         Collections.singletonList("featureA"), null, null,
-        true, features, null, "onlineTopicName", null, null, "eventTime", null, null, null);
+        true, TimeTravelFormat.HUDI, features, null, "onlineTopicName", null, null, "eventTime", null, null, null);
     featureGroup.featureGroupEngine = featureGroupEngine;
 
     // Act
diff --git a/locust_benchmark/Dockerfile b/locust_benchmark/Dockerfile
index e437ab9b2..47ef44106 100644
--- a/locust_benchmark/Dockerfile
+++ b/locust_benchmark/Dockerfile
@@ -1,4 +1,4 @@
-FROM locustio/locust:2.17.0
+FROM locustio/locust:2.23.1
 
 USER root
 
diff --git a/locust_benchmark/Jenkinsfile b/locust_benchmark/Jenkinsfile
new file mode 100644
index 000000000..9d4465e97
--- /dev/null
+++ b/locust_benchmark/Jenkinsfile
@@ -0,0 +1,20 @@
+@Library("jenkins-library@main")
+
+import com.logicalclocks.jenkins.k8s.ImageBuilder
+
+
+node("local") {
+    stage('Clone repository') {
+      checkout scm
+    }
+
+    stage('Build and push image(s)') {
+    version = readFile "${env.WORKSPACE}/locust_benchmark/KUBE_IMAGE_VERSION"
+      withEnv(["VERSION=${version.trim()}"]) {
+
+        def builder = new ImageBuilder(this)
+        m = readFile "${env.WORKSPACE}/locust_benchmark/build-manifest.json"
+        builder.run(m)
+     }
+    }
+}
\ No newline at end of file
diff --git a/locust_benchmark/KUBE_IMAGE_VERSION b/locust_benchmark/KUBE_IMAGE_VERSION
new file mode 100644
index 000000000..8b25206ff
--- /dev/null
+++ b/locust_benchmark/KUBE_IMAGE_VERSION
@@ -0,0 +1 @@
+master
\ No newline at end of file
diff --git a/locust_benchmark/README.md b/locust_benchmark/README.md
index c390b39db..eda8b440e 100644
--- a/locust_benchmark/README.md
+++ b/locust_benchmark/README.md
@@ -87,6 +87,7 @@ echo "[YOUR KEY]" > .api_key
 - `schema_repetitions`: This controls the number of features for the lookup. One schema repetition will result in 10 features plus primary key. Five repetitions will result in 50 features plus primary key.
 - `recreate_feature_group`: This controls if the previous feature group should be dropped and recreated. Set this to true when rerunning the benchmark with different size of rows or schema repetitions.
 - `batch_size`: This is relevant for the actual benchmark and controls how many feature vectors are looked up in the batch benchmark.
+- `tablespace`: (Optional) If set creates a feature group using on-disk data.
 
 3. Create the feature group
 
diff --git a/locust_benchmark/build-manifest.json b/locust_benchmark/build-manifest.json
new file mode 100644
index 000000000..48599c6dc
--- /dev/null
+++ b/locust_benchmark/build-manifest.json
@@ -0,0 +1,8 @@
+[
+    {
+        "name": "hopsworks/locust-hsfs",
+        "version": "env:VERSION",
+        "dockerFile": "locust_benchmark/Dockerfile",
+        "canUseCache": "true"
+    }
+]
diff --git a/locust_benchmark/common/hopsworks_client.py b/locust_benchmark/common/hopsworks_client.py
index b9fbcae04..d82409892 100644
--- a/locust_benchmark/common/hopsworks_client.py
+++ b/locust_benchmark/common/hopsworks_client.py
@@ -7,10 +7,8 @@
 import pandas as pd
 
 from locust.runners import MasterRunner, LocalRunner
-import hsfs
 
-from hsfs import client
-from hsfs.client.exceptions import RestAPIError
+import hopsworks
 
 
 class HopsworksClient:
@@ -21,14 +19,14 @@ def __init__(self, environment=None):
             environment.runner, (MasterRunner, LocalRunner)
         ):
             print(self.hopsworks_config)
-        self.connection = hsfs.connection(
+        self.project = hopsworks.login(
             project=self.hopsworks_config.get("project", "test"),
             host=self.hopsworks_config.get("host", "localhost"),
             port=self.hopsworks_config.get("port", 443),
             api_key_file=".api_key",
             engine="python",
         )
-        self.fs = self.connection.get_feature_store()
+        self.fs = self.project.get_feature_store()
 
         # test settings
         self.external = self.hopsworks_config.get("external", False)
@@ -38,6 +36,7 @@ def __init__(self, environment=None):
             "recreate_feature_group", False
         )
         self.batch_size = self.hopsworks_config.get("batch_size", 100)
+        self.tablespace = self.hopsworks_config.get("tablespace", None)
 
     def get_or_create_fg(self):
         locust_fg = self.fs.get_or_create_feature_group(
@@ -46,6 +45,7 @@ def get_or_create_fg(self):
             primary_key=["ip"],
             online_enabled=True,
             stream=True,
+            online_config={'table_space': self.tablespace} if self.tablespace else None
         )
         return locust_fg
 
@@ -59,18 +59,15 @@ def insert_data(self, locust_fg):
         return locust_fg
 
     def get_or_create_fv(self, fg=None):
-        try:
-            return self.fs.get_feature_view("locust_fv", version=1)
-        except RestAPIError:
-            return self.fs.create_feature_view(
-                name="locust_fv",
-                query=fg.select_all(),
-                version=1,
-            )
+        if fg is None:
+            fg = self.get_or_create_fg()
+        return self.fs.get_or_create_feature_view(
+            name="locust_fv", version=1, query=fg.select_all()
+        )
 
     def close(self):
-        if client._client is not None:
-            self.connection.close()
+        if self.project is not None:
+            hopsworks.logout()
 
     def generate_insert_df(self, rows, schema_repetitions):
         data = {"ip": range(0, rows)}
diff --git a/locust_benchmark/create_feature_group.py b/locust_benchmark/create_feature_group.py
index 2ac6cf568..dbc237e27 100644
--- a/locust_benchmark/create_feature_group.py
+++ b/locust_benchmark/create_feature_group.py
@@ -4,4 +4,5 @@
     hopsworks_client = HopsworksClient()
     fg = hopsworks_client.get_or_create_fg()
     hopsworks_client.insert_data(fg)
+    hopsworks_client.get_or_create_fv()
     hopsworks_client.close()
diff --git a/locust_benchmark/hopsworks_config.json b/locust_benchmark/hopsworks_config.json
index 6a8e60862..6e92b6739 100644
--- a/locust_benchmark/hopsworks_config.json
+++ b/locust_benchmark/hopsworks_config.json
@@ -1,10 +1,11 @@
 {
-    "host": "localhost",
+    "host": "mercury.hops.works",
     "port": 443,
-    "project": "test",
+    "project": "fabio_demo",
     "external": true,
-    "rows": 100000,
+    "rows": 1000,
     "schema_repetitions": 1,
     "recreate_feature_group": true,
-    "batch_size": 100
+    "batch_size": 100,
+    "tablespace": "ts1"
 }
diff --git a/locust_benchmark/locustfile.py b/locust_benchmark/locustfile.py
index d2d3ff933..105d80abd 100644
--- a/locust_benchmark/locustfile.py
+++ b/locust_benchmark/locustfile.py
@@ -3,7 +3,7 @@
 from common.hopsworks_client import HopsworksClient
 from common.stop_watch import stopwatch
 from locust import HttpUser, User, task, constant, events
-from locust.runners import MasterRunner, LocalRunner
+from locust.runners import MasterRunner
 from urllib3 import PoolManager
 import nest_asyncio
 
@@ -11,12 +11,8 @@
 @events.init.add_listener
 def on_locust_init(environment, **kwargs):
     print("Locust process init")
-
-    if isinstance(environment.runner, (MasterRunner, LocalRunner)):
-        # create feature view
-        environment.hopsworks_client = HopsworksClient(environment)
-        fg = environment.hopsworks_client.get_or_create_fg()
-        environment.hopsworks_client.get_or_create_fv(fg)
+    environment.hopsworks_client = HopsworksClient(environment)
+    environment.hopsworks_client.get_or_create_fg()
 
 
 @events.quitting.add_listener
@@ -61,27 +57,21 @@ def get_feature_vector(self):
 
 
 class MySQLFeatureVectorLookup(User):
-    wait_time = constant(0)
-    weight = 5
-    # fixed_count = 1
+    wait_time = constant(0.001)
+    weight = 2
 
     def __init__(self, environment):
         super().__init__(environment)
-        self.env = environment
-        self.client = HopsworksClient(environment)
-        self.fv = self.client.get_or_create_fv()
+        self.client = environment.hopsworks_client
 
     def on_start(self):
-        print("Init user")
+        self.fv = self.client.get_or_create_fv()
         self.fv.init_serving(external=self.client.external)
         nest_asyncio.apply()
 
-    def on_stop(self):
-        print("Closing user")
-
     @task
     def get_feature_vector(self):
-        self._get_feature_vector({"ip": random.randint(0, self.client.rows - 1)})
+        return self._get_feature_vector({"ip": random.randint(0, self.client.rows - 1)})
 
     @stopwatch
     def _get_feature_vector(self, pk):
@@ -89,14 +79,12 @@ def _get_feature_vector(self, pk):
 
 
 class MySQLFeatureVectorBatchLookup(User):
-    wait_time = constant(0)
+    wait_time = constant(0.001)
     weight = 1
-    # fixed_count = 1
 
     def __init__(self, environment):
         super().__init__(environment)
-        self.env = environment
-        self.client = HopsworksClient(environment)
+        self.client = environment.hopsworks_client
         self.fv = self.client.get_or_create_fv()
 
     def on_start(self):
@@ -104,16 +92,13 @@ def on_start(self):
         self.fv.init_serving(external=self.client.external)
         nest_asyncio.apply()
 
-    def on_stop(self):
-        print("Closing user")
-
     @task
     def get_feature_vector_batch(self):
         pks = [
             {"ip": random.randint(0, self.client.rows - 1)}
             for i in range(self.client.batch_size)
         ]
-        self._get_feature_vectors(pks)
+        return self._get_feature_vectors(pks)
 
     @stopwatch
     def _get_feature_vectors(self, pk):
diff --git a/locust_benchmark/requirements.txt b/locust_benchmark/requirements.txt
index 2eef53a7f..d992f8066 100644
--- a/locust_benchmark/requirements.txt
+++ b/locust_benchmark/requirements.txt
@@ -1,3 +1,4 @@
 markupsafe==2.0.1
-locust==2.17.0
+locust==2.23.1
+nest_asyncio==1.6.0
 git+https://github.com/logicalclocks/hopsworks-api@main#egg=hopsworks[python]&subdirectory=python
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 823e3c8f2..2341c5ae1 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -17,69 +17,69 @@ nav:
   - Setup and Installation: https://docs.hopsworks.ai/
   - Administration: https://docs.hopsworks.ai/
   - API<div class="dropdown"><button class="dropbtn"> API </button> <div id="myDropdown" class="dropdown-content"> <a id="hopsworks_api_link" href="https://docs.hopsworks.ai/hopsworks-api/latest">Python API</a> <a id="hsfs_javadoc_link" href="https://docs.hopsworks.ai/hopsworks-api/latest/javadoc">Feature Store JavaDoc</a> </div></div>:
+    - Login: generated/api/login.md
     - Platform API:
-      - Login: generated/api/login.md
       - Connection: generated/api/connection.md
-      - Projects: generated/api/projects.md
-      - Jobs: generated/api/jobs.md
+      - Datasets: generated/api/datasets.md
+      - Environment: generated/api/environment.md
       - Executions: generated/api/executions.md
       - FlinkCluster: generated/api/flink_cluster.md
-      - Environment: generated/api/environment.md
-      - GitRepo: generated/api/git_repo.md
       - GitProvider: generated/api/git_provider.md
       - GitRemote: generated/api/git_remote.md
-      - Datasets: generated/api/datasets.md
-      - KafkaTopic: generated/api/kafka_topic.md
+      - GitRepo: generated/api/git_repo.md
+      - Jobs: generated/api/jobs.md
       - KafkaSchema: generated/api/kafka_schema.md
-      - Secrets: generated/api/secrets.md
+      - KafkaTopic: generated/api/kafka_topic.md
       - OpenSearch: generated/api/opensearch.md
+      - Projects: generated/api/projects.md
+      - Secrets: generated/api/secrets.md
     - Feature Store API:
+      - Embedding:
+        - EmbeddingFeature: generated/api/embedding_feature_api.md
+        - EmbeddingIndex: generated/api/embedding_index_api.md
+        - SimilarityFunctionType: generated/api/similarity_function_type_api.md
       - ExpectationSuite: generated/api/expectation_suite_api.md
-      - FeatureStore: generated/api/feature_store_api.md
-      - FeatureGroup: generated/api/feature_group_api.md
       - ExternalFeatureGroup: generated/api/external_feature_group_api.md
-      - SpineGroup: generated/api/spine_group_api.md
-      - FeatureView: generated/api/feature_view_api.md
-      - TrainingDataset: generated/api/training_dataset_api.md
-      - Storage Connector: generated/api/storage_connector_api.md
       - Feature: generated/api/feature_api.md
+      - Feature Monitoring:
+        - Configuration: generated/api/feature_monitoring_config_api.md
+        - Result: generated/api/feature_monitoring_result_api.md
+        - Window: generated/api/feature_monitoring_window_config_api.md
+      - FeatureGroup: generated/api/feature_group_api.md
+      - FeatureStore: generated/api/feature_store_api.md
+      - FeatureView: generated/api/feature_view_api.md
+      - Provenance Links: generated/api/links.md
       - Query: generated/api/query_api.md
+      - SpineGroup: generated/api/spine_group_api.md
+      - Statistics:
+        - Feature descriptive statistics: generated/api/feature_descriptive_statistics_api.md
+        - Split Statistics: generated/api/split_statistics_api.md
+        - Statistics: generated/api/statistics_api.md
+      - Storage Connector: generated/api/storage_connector_api.md
+      - TrainingDataset: generated/api/training_dataset_api.md
       - Transformation Functions:
-        - UDF: generated/api/udf.md
         - HopsworksUDF: generated/api/hopsworks_udf.md
-        - TransformationFunction: generated/api/transformation_functions_api.md
         - Transformation Statistics:
-          - TransformationStatistics: generated/api/transformation_statistics.md
           - FeatureTransformationStatistics: generated/api/feature_transformation_statistics.md
+          - TransformationStatistics: generated/api/transformation_statistics.md
+        - TransformationFunction: generated/api/transformation_functions_api.md
+        - UDF: generated/api/udf.md
       - ValidationReport: generated/api/validation_report_api.md
-      - Provenance Links: generated/api/links.md
-      - Statistics:
-          - Statistics: generated/api/statistics_api.md
-          - Split Statistics: generated/api/split_statistics_api.md
-          - Feature descriptive statistics: generated/api/feature_descriptive_statistics_api.md
-      - Feature Monitoring:
-          - Configuration: generated/api/feature_monitoring_config_api.md
-          - Result: generated/api/feature_monitoring_result_api.md
-          - Window: generated/api/feature_monitoring_window_config_api.md
-      - Embedding:
-          - EmbeddingIndex: generated/api/embedding_index_api.md
-          - EmbeddingFeature: generated/api/embedding_feature_api.md
-          - SimilarityFunctionType: generated/api/similarity_function_type_api.md
     - Machine Learning API:
       - Model Registry:
-        - Model Registry: generated/model-registry/model_registry_api.md
         - Model: generated/model-registry/model_api.md
+        - Model Registry: generated/model-registry/model_registry_api.md
         - Model Schema: generated/model-registry/model_schema_api.md
       - Model Serving:
-        - Model Serving: generated/model-serving/model_serving_api.md
         - Deployment: generated/model-serving/deployment_api.md
         - Deployment state: generated/model-serving/predictor_state_api.md
         - Deployment state condition: generated/model-serving/predictor_state_condition_api.md
-        - Predictor: generated/model-serving/predictor_api.md
-        - Transformer: generated/model-serving/transformer_api.md
-        - Inference Logger: generated/model-serving/inference_logger_api.md
         - Inference Batcher: generated/model-serving/inference_batcher_api.md
+        - Inference Logger: generated/model-serving/inference_logger_api.md
+        - Model Serving: generated/model-serving/model_serving_api.md
+        - Predictor: generated/model-serving/predictor_api.md
         - Resources: generated/model-serving/resources_api.md
+        - Transformer: generated/model-serving/transformer_api.md
     # Added to allow navigation using the side drawer
     - Feature Store JavaDoc: https://docs.hopsworks.ai/feature-store-javadoc/latest/
     - Contributing: CONTRIBUTING.md
diff --git a/python/hopsworks/__init__.py b/python/hopsworks/__init__.py
index 79d500769..220dcadb8 100644
--- a/python/hopsworks/__init__.py
+++ b/python/hopsworks/__init__.py
@@ -22,6 +22,7 @@
 import tempfile
 import warnings
 from pathlib import Path
+from typing import Literal, Union
 
 from hopsworks import client, constants, project, version
 from hopsworks.client.exceptions import (
@@ -83,6 +84,7 @@ def login(
     api_key_file: str = None,
     hostname_verification: bool = False,
     trust_store_path: str = None,
+    engine: Union[None, Literal["spark"], Literal["python"], Literal["training"]] = None,
 ) -> project.Project:
     """Connect to [Serverless Hopsworks](https://app.hopsworks.ai) by calling the `hopsworks.login()` function with no arguments.
 
@@ -122,6 +124,13 @@ def login(
         api_key_file: Path to file wih Api Key
         hostname_verification: Whether to verify Hopsworks' certificate
         trust_store_path: Path on the file system containing the Hopsworks certificates
+        engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
+            which initializes the engine to Spark if the environment provides Spark, for
+            example on Hopsworks and Databricks, or falls back to Python if Spark is not
+            available, e.g. on local Python environments or AWS SageMaker. This option
+            allows you to override this behaviour. `"training"` engine is useful when only
+            feature store metadata is needed, for example training dataset location and label
+            information when Hopsworks training experiment is conducted.
     # Returns
         `Project`: The Project object to perform operations on
     # Raises
@@ -138,7 +147,7 @@ def login(
 
     # If inside hopsworks, just return the current project for now
     if "REST_ENDPOINT" in os.environ:
-        _hw_connection = _hw_connection(hostname_verification=hostname_verification)
+        _hw_connection = _hw_connection(hostname_verification=hostname_verification, engine=engine)
         _connected_project = _hw_connection.get_project()
         _initialize_module_apis()
         print("\nLogged in to project, explore it here " + _connected_project.get_url())
@@ -207,6 +216,7 @@ def login(
             _hw_connection = _hw_connection(
                 host=host,
                 port=port,
+                engine=engine,
                 api_key_file=api_key_path,
                 hostname_verification=hostname_verification,
                 trust_store_path=trust_store_path,
@@ -246,6 +256,7 @@ def login(
         _hw_connection = _hw_connection(
             host=host,
             port=port,
+            engine=engine,
             api_key_value=api_key,
             hostname_verification=hostname_verification,
             trust_store_path=trust_store_path,
diff --git a/python/hopsworks_common/client/online_store_rest_client.py b/python/hopsworks_common/client/online_store_rest_client.py
index 9ad05e9a3..b66897b09 100644
--- a/python/hopsworks_common/client/online_store_rest_client.py
+++ b/python/hopsworks_common/client/online_store_rest_client.py
@@ -305,7 +305,7 @@ def _check_hopsworks_connection(self) -> None:
         assert (
             client.get_instance() is not None and client.get_instance()._connected
         ), """Hopsworks Client is not connected. Please connect to Hopsworks cluster
-            via hopsworks.login or hsfs.connection before initialising the Online Store REST Client.
+            via hopsworks.login before initialising the Online Store REST Client.
             """
         _logger.debug("Hopsworks connection is active.")
 
diff --git a/python/hopsworks_common/connection.py b/python/hopsworks_common/connection.py
index 6972b6a85..43a64bc76 100644
--- a/python/hopsworks_common/connection.py
+++ b/python/hopsworks_common/connection.py
@@ -100,7 +100,7 @@ class Connection:
             Defaults to `None`.
         engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
             which initializes the engine to Spark if the environment provides Spark, for
-            example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
+            example on Hopsworks and Databricks, or falls back to Python if Spark is not
             available, e.g. on local Python environments or AWS SageMaker. This option
             allows you to override this behaviour. `"training"` engine is useful when only
             feature store metadata is needed, for example training dataset location and label
@@ -151,7 +151,6 @@ def __init__(
     def get_feature_store(
         self,
         name: Optional[str] = None,
-        engine: Optional[str] = None,
     ):  # -> feature_store.FeatureStore
         # the typing is commented out due to circular dependency, it breaks auto_doc.py
         """Get a reference to a feature store to perform operations on.
@@ -161,25 +160,10 @@ def get_feature_store(
 
         # Arguments
             name: The name of the feature store, defaults to `None`.
-            engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
-            which initializes the engine to Spark if the environment provides Spark, for
-            example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
-            available, e.g. on local Python environments or AWS SageMaker. This option
-            allows you to override this behaviour. `"training"` engine is useful when only
-            feature store metadata is needed, for example training dataset location and label
-            information when Hopsworks training experiment is conducted.
 
         # Returns
             `FeatureStore`. A feature store handle object to perform operations on.
         """
-        # Ensure the engine is initialized and of right type
-        from hsfs import engine as hsfs_engine
-
-        if engine:
-            global _hsfs_engine_type
-            _hsfs_engine_type = engine
-        hsfs_engine.get_instance()
-
         if not name:
             name = client.get_instance()._project_name
         return self._feature_store_api.get(util.append_feature_store_suffix(name))
@@ -484,7 +468,74 @@ def connection(
         api_key_file: Optional[str] = None,
         api_key_value: Optional[str] = None,
     ) -> Connection:
-        """Connection factory method, accessible through `hopsworks.connection()`."""
+        """Connection factory method, accessible through `hopsworks.connection()`.
+
+        This class provides convenience classmethods accessible from the `hopsworks`-module:
+
+        !!! example "Connection factory"
+            For convenience, `hopsworks` provides a factory method, accessible from the top level
+            module, so you don't have to import the `Connection` class manually:
+
+            ```python
+            import hopsworks
+            conn = hopsworks.connection()
+            ```
+
+        !!! hint "Save API Key as File"
+            To get started quickly, you can simply create a file with the previously
+            created Hopsworks API Key and place it on the environment from which you
+            wish to connect to Hopsworks.
+
+            You can then connect by simply passing the path to the key file when
+            instantiating a connection:
+
+            ```python hl_lines="6"
+                import hopsworks
+                conn = hopsworks.connection(
+                    'my_instance',                      # DNS of your Hopsworks instance
+                    443,                                # Port to reach your Hopsworks instance, defaults to 443
+                    api_key_file='hopsworks.key',       # The file containing the API key generated above
+                    hostname_verification=True)         # Disable for self-signed certificates
+                )
+                project = conn.get_project("my_project")
+            ```
+
+        Clients in external clusters need to connect to the Hopsworks using an
+        API key. The API key is generated inside the Hopsworks platform, and requires at
+        least the "project" scope to be able to access a project.
+        For more information, see the [integration guides](../setup.md).
+
+        # Arguments
+            host: The hostname of the Hopsworks instance in the form of `[UUID].cloud.hopsworks.ai`,
+                defaults to `None`. Do **not** use the url including `https://` when connecting
+                programatically.
+            port: The port on which the Hopsworks instance can be reached,
+                defaults to `443`.
+            project: The name of the project to connect to. When running on Hopsworks, this
+                defaults to the project from where the client is run from.
+                Defaults to `None`.
+            engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
+                which initializes the engine to Spark if the environment provides Spark, for
+                example on Hopsworks and Databricks, or falls back to Python if Spark is not
+                available, e.g. on local Python environments or AWS SageMaker. This option
+                allows you to override this behaviour. `"training"` engine is useful when only
+                feature store metadata is needed, for example training dataset location and label
+                information when Hopsworks training experiment is conducted.
+            hostname_verification: Whether or not to verify Hopsworks' certificate, defaults
+                to `True`.
+            trust_store_path: Path on the file system containing the Hopsworks certificates,
+                defaults to `None`.
+            cert_folder: The directory to store retrieved HopsFS certificates, defaults to
+                `"/tmp"`. Only required when running without a Spark environment.
+            api_key_file: Path to a file containing the API Key, defaults to `None`.
+            api_key_value: API Key as string, if provided, `api_key_file` will be ignored,
+                however, this should be used with care, especially if the used notebook or
+                job script is accessible by multiple parties. Defaults to `None`.
+
+        # Returns
+            `Connection`. Connection handle to perform operations on a
+                Hopsworks project.
+        """
         return cls(
             host,
             port,
diff --git a/python/hopsworks_common/constants.py b/python/hopsworks_common/constants.py
index 72672dae8..b98ed8497 100644
--- a/python/hopsworks_common/constants.py
+++ b/python/hopsworks_common/constants.py
@@ -158,14 +158,17 @@ class MODEL:
     FRAMEWORK_TORCH = "TORCH"
     FRAMEWORK_PYTHON = "PYTHON"
     FRAMEWORK_SKLEARN = "SKLEARN"
+    FRAMEWORK_LLM = "LLM"
 
 
 class MODEL_REGISTRY:
     HOPSFS_MOUNT_PREFIX = "/hopsfs/"
+    MODEL_FILES_DIR_NAME = "Files"
 
 
 class MODEL_SERVING:
     MODELS_DATASET = "Models"
+    ARTIFACTS_DIR_NAME = "Artifacts"
 
 
 class ARTIFACT_VERSION:
@@ -210,6 +213,7 @@ class PREDICTOR:
     # model server
     MODEL_SERVER_PYTHON = "PYTHON"
     MODEL_SERVER_TF_SERVING = "TENSORFLOW_SERVING"
+    MODEL_SERVER_VLLM = "VLLM"
     # serving tool
     SERVING_TOOL_DEFAULT = "DEFAULT"
     SERVING_TOOL_KSERVE = "KSERVE"
diff --git a/python/hopsworks_common/core/dataset_api.py b/python/hopsworks_common/core/dataset_api.py
index dc85dd263..f7ce40743 100644
--- a/python/hopsworks_common/core/dataset_api.py
+++ b/python/hopsworks_common/core/dataset_api.py
@@ -46,6 +46,7 @@ def __init__(self):
 
     DEFAULT_UPLOAD_FLOW_CHUNK_SIZE = 10 * 1024 * 1024
     DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS = 3
+    DEFAULT_UPLOAD_SIMULTANEOUS_CHUNKS = 3
     DEFAULT_UPLOAD_MAX_CHUNK_RETRIES = 1
 
     DEFAULT_DOWNLOAD_FLOW_CHUNK_SIZE = 1024 * 1024
@@ -159,10 +160,11 @@ def upload(
         overwrite: bool = False,
         chunk_size: int = DEFAULT_UPLOAD_FLOW_CHUNK_SIZE,
         simultaneous_uploads: int = DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS,
+        simultaneous_chunks: int = DEFAULT_UPLOAD_SIMULTANEOUS_CHUNKS,
         max_chunk_retries: int = DEFAULT_UPLOAD_MAX_CHUNK_RETRIES,
         chunk_retry_interval: int = 1,
     ):
-        """Upload a file to the Hopsworks filesystem.
+        """Upload a file or directory to the Hopsworks filesystem.
 
         ```python
 
@@ -172,44 +174,93 @@ def upload(
 
         dataset_api = project.get_dataset_api()
 
+        # upload a file to Resources dataset
         uploaded_file_path = dataset_api.upload("my_local_file.txt", "Resources")
 
+        # upload a directory to Resources dataset
+        uploaded_file_path = dataset_api.upload("my_dir", "Resources")
+
         ```
         # Arguments
-            local_path: local path to file to upload
+            local_path: local path to file or directory to upload, can be relative or absolute
             upload_path: path to directory where to upload the file in Hopsworks Filesystem
-            overwrite: overwrite file if exists
+            overwrite: overwrite file or directory if exists
             chunk_size: upload chunk size in bytes. Default 10 MB
-            simultaneous_uploads: number of simultaneous chunks to upload. Default 3
+            simultaneous_chunks: number of simultaneous chunks to upload for each file upload. Default 3
+            simultaneous_uploads: number of simultaneous files to be uploaded for directories. Default 3
             max_chunk_retries: maximum retry for a chunk. Default is 1
             chunk_retry_interval: chunk retry interval in seconds. Default is 1sec
         # Returns
-            `str`: Path to uploaded file
+            `str`: Path to uploaded file or directory
         # Raises
-            `RestAPIError`: If unable to upload the file
+            `RestAPIError`: If unable to upload the file or directory
         """
+
         # local path could be absolute or relative,
         if not os.path.isabs(local_path) and os.path.exists(
             os.path.join(os.getcwd(), local_path)
         ):
             local_path = os.path.join(os.getcwd(), local_path)
 
-        file_size = os.path.getsize(local_path)
-
         _, file_name = os.path.split(local_path)
 
         destination_path = upload_path + "/" + file_name
 
         if self.exists(destination_path):
             if overwrite:
-                self.remove(destination_path)
+                if 'datasetType' in self._get(destination_path):
+                    raise DatasetException("overwrite=True not supported on a top-level dataset")
+                else:
+                    self.remove(destination_path)
             else:
                 raise DatasetException(
                     "{} already exists, set overwrite=True to overwrite it".format(
-                        local_path
+                        destination_path
                     )
                 )
 
+        if os.path.isdir(local_path):
+            self.mkdir(destination_path)
+
+        if os.path.isdir(local_path):
+            with ThreadPoolExecutor(simultaneous_uploads) as executor:
+                # if path is a dir, upload files and folders iteratively
+                for root, dirs, files in os.walk(local_path):
+                    # os.walk(local_model_path), where local_model_path is expected to be an absolute path
+                    # - root is the absolute path of the directory being walked
+                    # - dirs is the list of directory names present in the root dir
+                    # - files is the list of file names present in the root dir
+                    # we need to replace the local path prefix with the hdfs path prefix (i.e., /srv/hops/....../root with /Projects/.../)
+                    remote_base_path = root.replace(
+                        local_path, destination_path
+                    ).replace(os.sep, "/")
+                    for d_name in dirs:
+                        self.mkdir(remote_base_path + "/" + d_name)
+
+                    # uploading files in the same folder is done concurrently
+                    futures = [
+                        executor.submit(
+                            self._upload_file, f_name, root + os.sep + f_name, remote_base_path, chunk_size, simultaneous_chunks, max_chunk_retries, chunk_retry_interval
+                        )
+                        for f_name in files
+                    ]
+
+                    # wait for all upload tasks to complete
+                    _, _ = wait(futures)
+                    try:
+                        _ = [future.result() for future in futures]
+                    except Exception as e:
+                        raise e
+        else:
+            self._upload_file(file_name, local_path, upload_path, chunk_size, simultaneous_chunks, max_chunk_retries, chunk_retry_interval)
+
+        return upload_path + "/" + os.path.basename(local_path)
+
+
+    def _upload_file(self, file_name, local_path, upload_path, chunk_size, simultaneous_chunks, max_chunk_retries, chunk_retry_interval):
+
+        file_size = os.path.getsize(local_path)
+
         num_chunks = math.ceil(file_size / chunk_size)
 
         base_params = self._get_flow_base_params(
@@ -223,15 +274,15 @@ def upload(
                 pbar = tqdm(
                     total=file_size,
                     bar_format="{desc}: {percentage:.3f}%|{bar}| {n_fmt}/{total_fmt} elapsed<{elapsed} remaining<{remaining}",
-                    desc="Uploading",
+                    desc="Uploading {}".format(local_path),
                 )
             except Exception:
                 self._log.exception("Failed to initialize progress bar.")
                 self._log.info("Starting upload")
-            with ThreadPoolExecutor(simultaneous_uploads) as executor:
+            with ThreadPoolExecutor(simultaneous_chunks) as executor:
                 while True:
                     chunks = []
-                    for _ in range(simultaneous_uploads):
+                    for _ in range(simultaneous_chunks):
                         chunk = f.read(chunk_size)
                         if not chunk:
                             break
@@ -269,8 +320,6 @@ def upload(
             else:
                 self._log.info("Upload finished")
 
-        return upload_path + "/" + os.path.basename(local_path)
-
     def _upload_chunk(
         self,
         base_params,
@@ -459,7 +508,10 @@ def copy(self, source_path: str, destination_path: str, overwrite: bool = False)
         """
         if self.exists(destination_path):
             if overwrite:
-                self.remove(destination_path)
+                if 'datasetType' in self._get(destination_path):
+                    raise DatasetException("overwrite=True not supported on a top-level dataset")
+                else:
+                    self.remove(destination_path)
             else:
                 raise DatasetException(
                     "{} already exists, set overwrite=True to overwrite it".format(
@@ -497,10 +549,12 @@ def move(self, source_path: str, destination_path: str, overwrite: bool = False)
         # Raises
             `RestAPIError`: If unable to perform the move
         """
-
         if self.exists(destination_path):
             if overwrite:
-                self.remove(destination_path)
+                if 'datasetType' in self._get(destination_path):
+                    raise DatasetException("overwrite=True not supported on a top-level dataset")
+                else:
+                    self.remove(destination_path)
             else:
                 raise DatasetException(
                     "{} already exists, set overwrite=True to overwrite it".format(
diff --git a/python/hopsworks_common/project.py b/python/hopsworks_common/project.py
index df82b3f79..b35cac288 100644
--- a/python/hopsworks_common/project.py
+++ b/python/hopsworks_common/project.py
@@ -109,7 +109,7 @@ def project_namespace(self):
         return self._project_namespace
 
     def get_feature_store(
-        self, name: Optional[str] = None, engine: Optional[str] = None
+        self, name: Optional[str] = None
     ):  # -> hsfs.feature_store.FeatureStore
         """Connect to Project's Feature Store.
 
@@ -127,15 +127,12 @@ def get_feature_store(
 
         # Arguments
             name: Project name of the feature store.
-            engine: Which engine to use, `"spark"`, `"python"` or `"training"`.
-                Defaults to `"python"` when connected to [Serverless Hopsworks](https://app.hopsworks.ai).
-                See hsfs.Connection.connection documentation for more information.
         # Returns
             `hsfs.feature_store.FeatureStore`: The Feature Store API
         # Raises
             `RestAPIError`: If unable to connect
         """
-        return client.get_connection().get_feature_store(name, engine)
+        return client.get_connection().get_feature_store(name)
 
     def get_model_registry(self):
         """Connect to Project's Model Registry API.
diff --git a/python/hopsworks_common/util.py b/python/hopsworks_common/util.py
index 5d14c70af..27a3ff8eb 100644
--- a/python/hopsworks_common/util.py
+++ b/python/hopsworks_common/util.py
@@ -465,6 +465,7 @@ def is_interactive():
 
 
 def set_model_class(model):
+    from hsml.llm.model import Model as LLMModel
     from hsml.model import Model as BaseModel
     from hsml.python.model import Model as PyModel
     from hsml.sklearn.model import Model as SkLearnModel
@@ -490,6 +491,8 @@ def set_model_class(model):
         return SkLearnModel(**model)
     elif framework == MODEL.FRAMEWORK_PYTHON:
         return PyModel(**model)
+    elif framework == MODEL.FRAMEWORK_LLM:
+        return LLMModel(**model)
     else:
         raise ValueError(
             "framework {} is not a supported framework".format(str(framework))
@@ -606,6 +609,8 @@ def validate_metrics(metrics):
 
 
 def get_predictor_for_model(model, **kwargs):
+    from hsml.llm.model import Model as LLMModel
+    from hsml.llm.predictor import Predictor as vLLMPredictor
     from hsml.model import Model as BaseModel
     from hsml.predictor import Predictor as BasePredictor
     from hsml.python.model import Model as PyModel
@@ -632,6 +637,8 @@ def get_predictor_for_model(model, **kwargs):
         return SkLearnPredictor(**kwargs)
     if type(model) is PyModel:
         return PyPredictor(**kwargs)
+    if type(model) is LLMModel:
+        return vLLMPredictor(**kwargs)
     if type(model) is BaseModel:
         return BasePredictor(  # python as default framework and model server
             model_framework=MODEL.FRAMEWORK_PYTHON,
diff --git a/python/hopsworks_common/version.py b/python/hopsworks_common/version.py
index 52cd363fc..82beef4ab 100644
--- a/python/hopsworks_common/version.py
+++ b/python/hopsworks_common/version.py
@@ -14,4 +14,4 @@
 #   limitations under the License.
 #
 
-__version__ = "4.1.0.dev1"
+__version__ = "4.2.0.dev1"
diff --git a/python/hsfs/builtin_transformations.py b/python/hsfs/builtin_transformations.py
index 1fc2ce670..4426268cc 100644
--- a/python/hsfs/builtin_transformations.py
+++ b/python/hsfs/builtin_transformations.py
@@ -43,7 +43,7 @@ def robust_scaler(feature: pd.Series, statistics=feature_statistics) -> pd.Serie
     )
 
 
-@udf(int, drop=["feature"])
+@udf(int, drop=["feature"], mode="pandas")
 def label_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Series:
     unique_data = sorted([value for value in statistics.feature.unique_values])
     value_to_index = {value: index for index, value in enumerate(unique_data)}
@@ -56,7 +56,7 @@ def label_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Serie
     )
 
 
-@udf(bool, drop=["feature"])
+@udf(bool, drop=["feature"], mode="pandas")
 def one_hot_encoder(feature: pd.Series, statistics=feature_statistics) -> pd.Series:
     unique_data = [value for value in statistics.feature.unique_values]
 
diff --git a/python/hsfs/core/feature_group_api.py b/python/hsfs/core/feature_group_api.py
index ab05fb9b5..037228c73 100644
--- a/python/hsfs/core/feature_group_api.py
+++ b/python/hsfs/core/feature_group_api.py
@@ -21,7 +21,12 @@
 from hopsworks_common import client
 from hsfs import feature_group as fg_mod
 from hsfs import feature_group_commit, util
-from hsfs.core import explicit_provenance, ingestion_job, ingestion_job_conf
+from hsfs.core import (
+    explicit_provenance,
+    ingestion_job,
+    ingestion_job_conf,
+    job,
+)
 
 
 class FeatureGroupApi:
@@ -416,6 +421,36 @@ def ingestion(
             ),
         )
 
+    def update_table_schema(
+        self,
+        feature_group_instance: fg_mod.FeatureGroup,
+    ) -> job.Job:
+        """
+        Setup a Hopsworks job to update table schema
+        Args:
+        feature_group_instance: FeatureGroup, required
+            metadata object of feature group.
+        job_conf: the configuration for the job application
+        """
+
+        _client = client.get_instance()
+        path_params = [
+            "project",
+            _client._project_id,
+            "featurestores",
+            feature_group_instance.feature_store_id,
+            "featuregroups",
+            feature_group_instance.id,
+            "updatetableschema",
+        ]
+
+        headers = {"content-type": "application/json"}
+        return job.Job.from_response_json(
+            _client._send_request(
+                "POST", path_params, headers=headers
+            ),
+        )
+
     def get_parent_feature_groups(
         self,
         feature_group_instance: Union[
diff --git a/python/hsfs/core/feature_group_engine.py b/python/hsfs/core/feature_group_engine.py
index f00a044e1..0eb5c441a 100644
--- a/python/hsfs/core/feature_group_engine.py
+++ b/python/hsfs/core/feature_group_engine.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import warnings
-from typing import List
+from typing import List, Union
 
 from hsfs import engine, feature, util
 from hsfs import feature_group as fg
@@ -67,7 +67,7 @@ def _update_feature_group_schema_on_demand_transformations(
 
     def save(
         self,
-        feature_group,
+        feature_group: Union[fg.FeatureGroup, fg.ExternalFeatureGroup],
         feature_dataframe,
         write_options,
         validation_options: dict = None,
@@ -80,6 +80,21 @@ def save(
                 feature_group=feature_group, features=dataframe_features
             )
         )
+
+        # Currently on-demand transformation functions not supported in external feature groups.
+        if feature_group.transformation_functions:
+            if not isinstance(feature_group, fg.ExternalFeatureGroup):
+                feature_dataframe = (
+                    engine.get_instance()._apply_transformation_function(
+                        feature_group.transformation_functions, feature_dataframe
+                    )
+                )
+            else:
+                warnings.warn(
+                    "On-Demand features were not created because On-Demand Transformations are not supported for External Feature Groups.",
+                    stacklevel=1,
+                )
+
         util.validate_embedding_feature_type(
             feature_group.embedding_index, dataframe_features
         )
@@ -119,7 +134,7 @@ def save(
 
     def insert(
         self,
-        feature_group,
+        feature_group: Union[fg.FeatureGroup, fg.ExternalFeatureGroup],
         feature_dataframe,
         overwrite,
         operation,
@@ -132,6 +147,16 @@ def insert(
             feature_group.time_travel_format,
             features=feature_group.features,
         )
+
+        # Currently on-demand transformation functions not supported in external feature groups.
+        if (
+            not isinstance(feature_group, fg.ExternalFeatureGroup)
+            and feature_group.transformation_functions
+        ):
+            feature_dataframe = engine.get_instance()._apply_transformation_function(
+                feature_group.transformation_functions, feature_dataframe
+            )
+
         dataframe_features = (
             self._update_feature_group_schema_on_demand_transformations(
                 feature_group=feature_group, features=dataframe_features
@@ -249,6 +274,8 @@ def commit_delete(feature_group, delete_df, write_options):
     @staticmethod
     def delta_vacuum(feature_group, retention_hours):
         if feature_group.time_travel_format == "DELTA":
+            # TODO: This should change, DeltaEngine and HudiEngine always assumes spark client!
+            # Cannot properly manage what should happen when using python.
             delta_engine_instance = delta_engine.DeltaEngine(
                 feature_group.feature_store_id,
                 feature_group.feature_store_name,
@@ -296,10 +323,7 @@ def append_features(self, feature_group, new_features):
         )
 
         # write empty dataframe to update parquet schema
-        if feature_group.time_travel_format == "DELTA":
-            engine.get_instance().add_cols_to_delta_table(feature_group, new_features)
-        else:
-            engine.get_instance().save_empty_dataframe(feature_group, new_features=new_features)
+        engine.get_instance().update_table_schema(feature_group)
 
     def update_description(self, feature_group, description):
         """Updates the description of a feature group."""
@@ -326,7 +350,7 @@ def update_deprecated(self, feature_group, deprecate):
 
     def insert_stream(
         self,
-        feature_group,
+        feature_group: Union[fg.FeatureGroup, fg.ExternalFeatureGroup],
         dataframe,
         query_name,
         output_mode,
@@ -349,6 +373,12 @@ def insert_stream(
                 feature_group=feature_group, features=dataframe_features
             )
         )
+
+        if feature_group.transformation_functions:
+            dataframe = engine.get_instance()._apply_transformation_function(
+                feature_group.transformation_functions, dataframe
+            )
+
         util.validate_embedding_feature_type(
             feature_group.embedding_index, dataframe_features
         )
diff --git a/python/hsfs/core/hudi_engine.py b/python/hsfs/core/hudi_engine.py
index 4492f0a19..e96b8ea56 100644
--- a/python/hsfs/core/hudi_engine.py
+++ b/python/hsfs/core/hudi_engine.py
@@ -234,25 +234,6 @@ def _setup_hudi_read_opts(self, hudi_fg_alias, read_options):
 
         return hudi_options
 
-    def reconcile_hudi_schema(
-        self, save_empty_dataframe_callback, hudi_fg_alias, read_options
-    ):
-        if sorted(self._spark_session.table(hudi_fg_alias.alias).columns) != sorted(
-            [feature.name for feature in hudi_fg_alias.feature_group._features] + self.HUDI_SPEC_FEATURE_NAMES
-        ):
-            full_fg = self._feature_group_api.get(
-                feature_store_id=hudi_fg_alias.feature_group._feature_store_id,
-                name=hudi_fg_alias.feature_group.name,
-                version=hudi_fg_alias.feature_group.version,
-            )
-
-            save_empty_dataframe_callback(full_fg)
-
-            self.register_temporary_table(
-                hudi_fg_alias,
-                read_options,
-            )
-
     @staticmethod
     def _get_last_commit_metadata(spark_context, base_path):
         hopsfs_conf = spark_context._jvm.org.apache.hadoop.fs.FileSystem.get(
diff --git a/python/hsfs/core/kafka_engine.py b/python/hsfs/core/kafka_engine.py
index d21b6ec22..ee9e892be 100644
--- a/python/hsfs/core/kafka_engine.py
+++ b/python/hsfs/core/kafka_engine.py
@@ -141,7 +141,7 @@ def kafka_get_offsets(
             offsets += f",{partition_metadata.id}:{consumer.get_watermark_offsets(partition)[tuple_value]}"
         consumer.close()
 
-        return f" -initialCheckPointString {topic_name + offsets}"
+        return f"{topic_name + offsets}"
     return ""
 
 
diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py
index 0e785dde5..d354a5400 100755
--- a/python/hsfs/core/vector_server.py
+++ b/python/hsfs/core/vector_server.py
@@ -149,6 +149,7 @@ def __init__(
         self._feature_to_handle_if_sql: Optional[Set[str]] = None
         self._valid_serving_keys: Set[str] = set()
         self._serving_initialized: bool = False
+        self.__all_features_on_demand: Optional[bool] = None
 
     def init_serving(
         self,
@@ -415,14 +416,23 @@ def get_feature_vectors(
             request_parameters is None
             or len(request_parameters) == 0
             or isinstance(request_parameters, dict)
+            or not entries
             or len(request_parameters) == len(entries)
-        ), "Request Parameters should be a Dictionary, None, empty or have the same length as the entries"
+        ), "Request Parameters should be a Dictionary, None, empty or have the same length as the entries if they are not None or empty."
 
         online_client_choice = self.which_client_and_ensure_initialised(
             force_rest_client=force_rest_client, force_sql_client=force_sql_client
         )
         rondb_entries = []
         skipped_empty_entries = []
+
+        if not entries:
+            entries = (
+                [[] * len(request_parameters)]
+                if isinstance(request_parameters, list)
+                else [[]]
+            )
+
         for (idx, entry), passed, vector_features in itertools.zip_longest(
             enumerate(entries),
             passed_features,
@@ -547,7 +557,11 @@ def assemble_feature_vector(
         # for backward compatibility, before 3.4, if result is empty,
         # instead of throwing error, it skips the result
         # Maybe we drop this behaviour for 4.0
-        if len(result_dict) == 0 and not allow_missing:
+        if (
+            len(result_dict) == 0
+            and not allow_missing
+            and not self._all_features_on_demand
+        ):
             return None
 
         if not allow_missing and len(missing_features) > 0:
@@ -1255,6 +1269,17 @@ def validate_entry(
 
         Keys relevant to vector_db are filtered out.
         """
+        _logger.debug(
+            "Checking if entry is None and all features in the feature view are on-demand."
+        )
+        if not entry:
+            if self._all_features_on_demand:
+                return {}
+            else:
+                raise exceptions.FeatureStoreException(
+                    "The required argument `entries` is missing. If the feature view includes only on-demand features, entries may be left empty or set to None."
+                )
+
         _logger.debug("Checking keys in entry are valid serving keys.")
         for key in entry.keys():
             if key not in self.valid_serving_keys:
@@ -1323,6 +1348,15 @@ def identify_missing_features_pre_fetch(
                 passed_feature_names = passed_feature_names.union(
                     vector_db_features.keys()
                 )
+            if self._on_demand_feature_names and len(self._on_demand_feature_names) > 0:
+                # Remove on-demand features from validation check as they would be computed.
+                _logger.debug(
+                    "Appending on_demand_feature_names : %s, to passed_feature_names for pre-fetch missing",
+                    self._on_demand_feature_names,
+                )
+                passed_feature_names = passed_feature_names.union(
+                    self._on_demand_feature_names
+                )
             neither_fetched_nor_passed = fetched_features.difference(
                 passed_feature_names
             )
@@ -1575,3 +1609,12 @@ def transformed_feature_vector_col_name(self):
             ]
             self._transformed_feature_vector_col_name.extend(output_column_names)
         return self._transformed_feature_vector_col_name
+
+    @property
+    def _all_features_on_demand(self) -> bool:
+        """True if all features in the feature view are on-demand."""
+        if self.__all_features_on_demand is None:
+            self.__all_features_on_demand = all(
+                feature.on_demand_transformation_function for feature in self._features
+            )
+        return self.__all_features_on_demand
diff --git a/python/hsfs/engine/python.py b/python/hsfs/engine/python.py
index b2fb1968d..eeacf8e27 100644
--- a/python/hsfs/engine/python.py
+++ b/python/hsfs/engine/python.py
@@ -808,15 +808,6 @@ def save_dataframe(
         online_write_options: Dict[str, Any],
         validation_id: Optional[int] = None,
     ) -> Optional[job.Job]:
-        # Currently on-demand transformation functions not supported in external feature groups.
-        if (
-            not isinstance(feature_group, ExternalFeatureGroup)
-            and feature_group.transformation_functions
-        ):
-            dataframe = self._apply_transformation_function(
-                feature_group.transformation_functions, dataframe
-            )
-
         if (
             hasattr(feature_group, "EXTERNAL_FEATURE_GROUP")
             and feature_group.online_enabled
@@ -1212,11 +1203,11 @@ def save_stream_dataframe(
             "Stream ingestion is not available on Python environments, because it requires Spark as engine."
         )
 
-    def save_empty_dataframe(
-        self, feature_group: Union[FeatureGroup, ExternalFeatureGroup], new_features=None
-    ) -> None:
-        """Wrapper around save_dataframe in order to provide no-op."""
-        pass
+    def update_table_schema(self, feature_group: Union[FeatureGroup, ExternalFeatureGroup]) -> None:
+        _job = self._feature_group_api.update_table_schema(feature_group)
+        _job._wait_for_job(
+            await_termination=True
+        )
 
     def _get_app_options(
         self, user_write_options: Optional[Dict[str, Any]] = None
@@ -1296,9 +1287,19 @@ def _apply_transformation_function(
                 dataset.columns
             )
             if missing_features:
-                raise FeatureStoreException(
-                    f"Features {missing_features} specified in the transformation function '{hopsworks_udf.function_name}' are not present in the feature view. Please specify the feature required correctly."
-                )
+                if (
+                    tf.transformation_type
+                    == transformation_function.TransformationType.ON_DEMAND
+                ):
+                    # On-demand transformation are applied using the python/spark engine during insertion, the transformation while retrieving feature vectors are performed in the vector_server.
+                    raise FeatureStoreException(
+                        f"The following feature(s): `{'`, '.join(missing_features)}`, specified in the on-demand transformation function '{hopsworks_udf.function_name}' are not present in the dataframe being inserted into the feature group. "
+                        + "Please verify that the correct feature names are used in the transformation function and that these features exist in the dataframe being inserted."
+                    )
+                else:
+                    raise FeatureStoreException(
+                        f"The following feature(s): `{'`, '.join(missing_features)}`, specified in the model-dependent transformation function '{hopsworks_udf.function_name}' are not present in the feature view. Please verify that the correct features are specified in the transformation function."
+                    )
             if tf.hopsworks_udf.dropped_features:
                 dropped_features.update(tf.hopsworks_udf.dropped_features)
 
@@ -1406,7 +1407,9 @@ def _apply_pandas_udf(
                         for feature in hopsworks_udf.transformation_features
                     ]
                 )
-            )
+            ).set_index(
+                dataframe.index
+            )  # Index is set to the input dataframe index so that pandas would merge the new columns without reordering them.
         else:
             dataframe[hopsworks_udf.output_column_names[0]] = hopsworks_udf.get_udf(
                 online=False
@@ -1417,9 +1420,11 @@ def _apply_pandas_udf(
                         for feature in hopsworks_udf.transformation_features
                     ]
                 )
-            )
+            ).set_axis(
+                dataframe.index
+            )  # Index is set to the input dataframe index so that pandas would merge the new column without reordering it.
             if hopsworks_udf.output_column_names[0] in dataframe.columns:
-                # Overwriting features so reordering dataframe to move overwritten column to the end of the dataframe
+                # Overwriting features also reordering dataframe to move overwritten column to the end of the dataframe
                 cols = dataframe.columns.tolist()
                 cols.append(cols.pop(cols.index(hopsworks_udf.output_column_names[0])))
                 dataframe = dataframe[cols]
@@ -1505,12 +1510,12 @@ def _write_dataframe_kafka(
                 topic_name=feature_group._online_topic_name,
                 feature_store_id=feature_group.feature_store_id,
                 offline_write_options=offline_write_options,
-                high=True,
+                high=False,
             )
             now = datetime.now(timezone.utc)
             feature_group.materialization_job.run(
                 args=feature_group.materialization_job.config.get("defaultArgs", "")
-                + initial_check_point,
+                + (f" -initialCheckPointString {initial_check_point}" if initial_check_point else ""),
                 await_termination=offline_write_options.get("wait_for_job", False),
             )
             offline_backfill_every_hr = offline_write_options.pop(
@@ -1540,7 +1545,7 @@ def _write_dataframe_kafka(
             # provide the initial_check_point as it will reduce the read amplification of materialization job
             feature_group.materialization_job.run(
                 args=feature_group.materialization_job.config.get("defaultArgs", "")
-                + initial_check_point,
+                + (f" -initialCheckPointString {initial_check_point}" if initial_check_point else ""),
                 await_termination=offline_write_options.get("wait_for_job", False),
             )
         return feature_group.materialization_job
@@ -1582,9 +1587,10 @@ def _start_offline_materialization(offline_write_options: Dict[str, Any]) -> boo
     def _convert_feature_log_to_df(feature_log, cols) -> pd.DataFrame:
         if feature_log is None and cols:
             return pd.DataFrame(columns=cols)
-        if not (isinstance(feature_log, (list, pd.DataFrame, pl.DataFrame)) or (
-            HAS_NUMPY and isinstance(feature_log, np.ndarray)
-        )):
+        if not (
+            isinstance(feature_log, (list, pd.DataFrame, pl.DataFrame))
+            or (HAS_NUMPY and isinstance(feature_log, np.ndarray))
+        ):
             raise ValueError(f"Type '{type(feature_log)}' not accepted")
         if isinstance(feature_log, list) or (
             HAS_NUMPY and isinstance(feature_log, np.ndarray)
diff --git a/python/hsfs/engine/spark.py b/python/hsfs/engine/spark.py
index 2ff6bc39d..67e15468b 100644
--- a/python/hsfs/engine/spark.py
+++ b/python/hsfs/engine/spark.py
@@ -35,6 +35,7 @@
 import tzlocal
 from hopsworks_common.core.constants import HAS_NUMPY, HAS_PANDAS
 from hsfs.constructor import query
+from hsfs.core import feature_group_api
 
 # in case importing in %%local
 from hsfs.core.vector_db_client import VectorDbClient
@@ -197,7 +198,7 @@ def register_external_temporary_table(self, external_fg, alias):
                 external_fg.query,
                 external_fg.data_format,
                 external_fg.options,
-                external_fg.prepare_spark_location(),
+                external_fg.storage_connector._get_path(external_fg.path), # cant rely on location since this method can be used before FG is saved
             )
         else:
             external_dataset = external_fg.dataframe
@@ -221,8 +222,8 @@ def register_hudi_temporary_table(
             read_options,
         )
 
-        hudi_engine_instance.reconcile_hudi_schema(
-            self.save_empty_dataframe, hudi_fg_alias, read_options
+        self.reconcile_schema(
+            hudi_fg_alias, read_options, hudi_engine_instance
         )
 
     def register_delta_temporary_table(
@@ -241,6 +242,30 @@ def register_delta_temporary_table(
             read_options,
         )
 
+        self.reconcile_schema(
+            delta_fg_alias, read_options, delta_engine_instance
+        )
+
+    def reconcile_schema(
+        self, fg_alias, read_options, engine_instance
+    ):
+        if sorted(self._spark_session.table(fg_alias.alias).columns) != sorted(
+            [feature.name for feature in fg_alias.feature_group._features] +
+            hudi_engine.HudiEngine.HUDI_SPEC_FEATURE_NAMES if fg_alias.feature_group.time_travel_format == "HUDI" else []
+        ):
+            full_fg = feature_group_api.FeatureGroupApi().get(
+                feature_store_id=fg_alias.feature_group._feature_store_id,
+                name=fg_alias.feature_group.name,
+                version=fg_alias.feature_group.version,
+            )
+
+            self.update_table_schema(full_fg)
+
+            engine_instance.register_temporary_table(
+                fg_alias,
+                read_options,
+            )
+
     def _return_dataframe_type(self, dataframe, dataframe_type):
         if dataframe_type.lower() in ["default", "spark"]:
             return dataframe
@@ -415,14 +440,6 @@ def save_dataframe(
         validation_id=None,
     ):
         try:
-            # Currently on-demand transformation functions not supported in external feature groups.
-            if (
-                not isinstance(feature_group, fg_mod.ExternalFeatureGroup)
-                and feature_group.transformation_functions
-            ):
-                dataframe = self._apply_transformation_function(
-                    feature_group.transformation_functions, dataframe
-                )
             if (
                 isinstance(feature_group, fg_mod.ExternalFeatureGroup)
                 and feature_group.online_enabled
@@ -467,17 +484,10 @@ def save_stream_dataframe(
         checkpoint_dir: Optional[str],
         write_options: Optional[Dict[str, Any]],
     ):
-        if feature_group.transformation_functions:
-            dataframe = self._apply_transformation_function(
-                feature_group.transformation_functions, dataframe
-            )
-
         write_options = kafka_engine.get_kafka_config(
             feature_group.feature_store_id, write_options, engine="spark"
         )
-        serialized_df = self._online_fg_to_avro(
-            feature_group, self._encode_complex_features(feature_group, dataframe)
-        )
+        serialized_df = self._serialize_to_avro(feature_group, dataframe)
 
         project_id = str(feature_group.feature_store.project_id)
         feature_group_id = str(feature_group._id)
@@ -570,9 +580,7 @@ def _save_online_dataframe(self, feature_group, dataframe, write_options):
             feature_group.feature_store_id, write_options, engine="spark"
         )
 
-        serialized_df = self._online_fg_to_avro(
-            feature_group, self._encode_complex_features(feature_group, dataframe)
-        )
+        serialized_df = self._serialize_to_avro(feature_group, dataframe)
 
         project_id = str(feature_group.feature_store.project_id).encode("utf8")
         feature_group_id = str(feature_group._id).encode("utf8")
@@ -592,13 +600,13 @@ def _save_online_dataframe(self, feature_group, dataframe, write_options):
             "topic", feature_group._online_topic_name
         ).save()
 
-    def _encode_complex_features(
+    def _serialize_to_avro(
         self,
         feature_group: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup],
         dataframe: Union[RDD, DataFrame],
     ):
         """Encodes all complex type features to binary using their avro type as schema."""
-        return dataframe.select(
+        encoded_dataframe = dataframe.select(
             [
                 field["name"]
                 if field["name"] not in feature_group.get_complex_features()
@@ -609,15 +617,10 @@ def _encode_complex_features(
             ]
         )
 
-    def _online_fg_to_avro(
-        self,
-        feature_group: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup],
-        dataframe: Union[DataFrame, RDD],
-    ):
         """Packs all features into named struct to be serialized to single avro/binary
         column. And packs primary key into arry to be serialized for partitioning.
         """
-        return dataframe.select(
+        return encoded_dataframe.select(
             [
                 # be aware: primary_key array should always be sorted
                 to_avro(
@@ -640,6 +643,30 @@ def _online_fg_to_avro(
             ]
         )
 
+    def _deserialize_from_avro(
+            self,
+            feature_group: Union[fg_mod.FeatureGroup, fg_mod.ExternalFeatureGroup],
+            dataframe: Union[RDD, DataFrame],
+        ):
+            """
+            Deserializes 'value' column from binary using avro schema and unpacks it into columns.
+            """
+            decoded_dataframe = dataframe.select(
+                from_avro("value", feature_group._get_encoded_avro_schema()).alias("value")
+            ).select(col("value.*"))
+
+            """Decodes all complex type features from binary using their avro type as schema."""
+            return decoded_dataframe.select(
+                [
+                    field["name"]
+                    if field["name"] not in feature_group.get_complex_features()
+                    else from_avro(
+                        field["name"], feature_group._get_feature_avro_schema(field["name"])
+                    ).alias(field["name"])
+                    for field in json.loads(feature_group.avro_schema)["fields"]
+                ]
+            )
+
     def get_training_data(
         self,
         training_dataset: training_dataset.TrainingDataset,
@@ -1250,8 +1277,12 @@ def setup_storage_connector(self, storage_connector, path=None):
             return path
 
     def _setup_s3_hadoop_conf(self, storage_connector, path):
-        # For legacy behaviour set the S3 values at global level
-        self._set_s3_hadoop_conf(storage_connector, "fs.s3a")
+        FS_S3_GLOBAL_CONF = "fs.s3a.global-conf"
+
+        # The argument arrive here as strings
+        if storage_connector.arguments.get(FS_S3_GLOBAL_CONF, "True").lower() == "true":
+            # For legacy behaviour set the S3 values at global level
+            self._set_s3_hadoop_conf(storage_connector, "fs.s3a")
 
         # Set credentials at bucket level as well to allow users to use multiple
         # storage connector in the same application.
@@ -1309,18 +1340,20 @@ def is_spark_dataframe(self, dataframe):
             return True
         return False
 
-    def save_empty_dataframe(self, feature_group, new_features=None):
+    def update_table_schema(self, feature_group):
+        if feature_group.time_travel_format == "DELTA":
+            self._add_cols_to_delta_table(feature_group)
+        else:
+            self._save_empty_dataframe(feature_group)
+
+    def _save_empty_dataframe(self, feature_group):
         location = feature_group.prepare_spark_location()
 
         dataframe = self._spark_session.read.format("hudi").load(location)
 
-        if (new_features is not None):
-            if isinstance(new_features, list):
-                for new_feature in new_features:
-                    dataframe = dataframe.withColumn(new_feature.name, lit(None).cast(new_feature.type))
-            else:
-                dataframe = dataframe.withColumn(new_features.name, lit(None).cast(new_features.type))
-
+        for _feature in feature_group.features:
+            if _feature.name not in dataframe.columns:
+                dataframe = dataframe.withColumn(_feature.name, lit(None).cast(_feature.type))
 
         self.save_dataframe(
             feature_group,
@@ -1332,23 +1365,20 @@ def save_empty_dataframe(self, feature_group, new_features=None):
             {},
         )
 
-    def add_cols_to_delta_table(self, feature_group, new_features):
+    def _add_cols_to_delta_table(self, feature_group):
         location = feature_group.prepare_spark_location()
 
         dataframe = self._spark_session.read.format("delta").load(location)
 
-        if (new_features is not None):
-            if isinstance(new_features, list):
-                for new_feature in new_features:
-                    dataframe = dataframe.withColumn(new_feature.name, lit("").cast(new_feature.type))
-            else:
-                dataframe = dataframe.withColumn(new_features.name, lit("").cast(new_features.type))
+        for _feature in feature_group.features:
+            if _feature.name not in dataframe.columns:
+                dataframe = dataframe.withColumn(_feature.name, lit(None).cast(_feature.type))
 
-        dataframe.limit(0).write.format("delta").mode(
-            "append"
-        ).option("mergeSchema", "true").option(
-            "spark.databricks.delta.schema.autoMerge.enabled", "true"
-        ).save(location)
+        dataframe.limit(0).write.format("delta").mode("append").option(
+            "mergeSchema", "true"
+        ).option("spark.databricks.delta.schema.autoMerge.enabled", "true").save(
+            location
+        )
 
     def _apply_transformation_function(
         self,
@@ -1378,9 +1408,19 @@ def _apply_transformation_function(
             )
 
             if missing_features:
-                raise FeatureStoreException(
-                    f"Features {missing_features} specified in the transformation function '{hopsworks_udf.function_name}' are not present in the feature view. Please specify the feature required correctly."
-                )
+                if (
+                    tf.transformation_type
+                    == transformation_function.TransformationType.ON_DEMAND
+                ):
+                    # On-demand transformation are applied using the python/spark engine during insertion, the transformation while retrieving feature vectors are performed in the vector_server.
+                    raise FeatureStoreException(
+                        f"The following feature(s): `{'`, '.join(missing_features)}`, specified in the on-demand transformation function '{hopsworks_udf.function_name}' are not present in the dataframe being inserted into the feature group. "
+                        + "Please verify that the correct feature names are used in the transformation function and that these features exist in the dataframe being inserted."
+                    )
+                else:
+                    raise FeatureStoreException(
+                        f"The following feature(s): `{'`, '.join(missing_features)}`, specified in the model-dependent transformation function '{hopsworks_udf.function_name}' are not present in the feature view. Please verify that the correct features are specified in the transformation function."
+                    )
             if tf.hopsworks_udf.dropped_features:
                 dropped_features.update(hopsworks_udf.dropped_features)
 
diff --git a/python/hsfs/feature_group.py b/python/hsfs/feature_group.py
index a3385afda..e2c42f1a3 100644
--- a/python/hsfs/feature_group.py
+++ b/python/hsfs/feature_group.py
@@ -2327,27 +2327,14 @@ def __init__(
             # for python engine we always use stream feature group
             if engine.get_type() == "python":
                 self._stream = True
-            # for stream feature group time travel format is always HUDI
-            if self._stream:
-                expected_format = "HUDI"
-                if self._time_travel_format != expected_format:
-                    warnings.warn(
-                        (
-                            "The provided time travel format `{}` has been overwritten "
-                            "because Stream enabled feature groups only support `{}`"
-                        ).format(self._time_travel_format, expected_format),
-                        util.FeatureGroupWarning,
-                        stacklevel=1,
-                    )
-                    self._time_travel_format = expected_format
 
             self.primary_key = primary_key
             self.partition_key = partition_key
             self._hudi_precombine_key = (
                 util.autofix_feature_name(hudi_precombine_key)
                 if hudi_precombine_key is not None
-                and self._time_travel_format is not None
-                and self._time_travel_format == "HUDI"
+                and (self._time_travel_format is None
+                or self._time_travel_format == "HUDI")
                 else None
             )
             self.statistics_config = statistics_config
@@ -3289,7 +3276,7 @@ def delta_vacuum(
             fg = fs.get_or_create_feature_group(...)
 
             commit_details = fg.delta_vacuum(retention_hours = 168)
-
+            ```
         # Arguments
             retention_hours: User provided retention period. The default retention threshold for the files is 7 days.
 
diff --git a/python/hsfs/feature_store.py b/python/hsfs/feature_store.py
index c1ef352f9..4b45c9c77 100644
--- a/python/hsfs/feature_store.py
+++ b/python/hsfs/feature_store.py
@@ -458,7 +458,7 @@ def sql(
                 For spark engine: Dictionary of read options for Spark.
                 For python engine:
                 If running queries on the online feature store, users can provide an entry `{'external': True}`,
-                this instructs the library to use the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) to establish the connection to the online feature store.
+                this instructs the library to use the `host` parameter in the [`hopsworks.login()`](login.md#login) to establish the connection to the online feature store.
                 If not set, or set to False, the online feature store storage connector is used which relies on
                 the private ip.
                 Defaults to `{}`.
@@ -556,7 +556,7 @@ def plus_two(value):
                     online_enabled=True,
                     event_time='date',
                     transformation_functions=transformation_functions,
-                    online_config={'online_comments': ['NDB_TABLE=READ_BACKUP=1']}
+                    online_config={'table_space': 'ts_1', 'online_comments': ['NDB_TABLE=READ_BACKUP=1']}
                 )
             ```
 
@@ -721,7 +721,7 @@ def get_or_create_feature_group(
                     online_enabled=True,
                     event_time="timestamp",
                     transformation_functions=transformation_functions,
-                    online_config={'online_comments': ['NDB_TABLE=READ_BACKUP=1']}
+                    online_config={'table_space': 'ts_1', 'online_comments': ['NDB_TABLE=READ_BACKUP=1']}
                     )
             ```
 
@@ -1023,7 +1023,7 @@ def create_external_feature_group(
                     primary_key=['ss_store_sk'],
                     event_time='sale_date',
                     online_enabled=True,
-                    online_config={'online_comments': ['NDB_TABLE=READ_BACKUP=1']}
+                    online_config={'table_space': 'ts_1', 'online_comments': ['NDB_TABLE=READ_BACKUP=1']}
                     )
         external_fg.save()
 
diff --git a/python/hsfs/feature_view.py b/python/hsfs/feature_view.py
index 6dbe7a585..b61b3e09a 100644
--- a/python/hsfs/feature_view.py
+++ b/python/hsfs/feature_view.py
@@ -337,7 +337,7 @@ def init_serving(
                 Transformation statistics are fetched from training dataset and applied to the feature vector.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used which relies on the private IP.
                 Defaults to True if connection to Hopsworks is established from external environment (e.g AWS
                 Sagemaker or Google Colab), otherwise to False.
@@ -520,7 +520,7 @@ def get_batch_query(
 
     def get_feature_vector(
         self,
-        entry: Dict[str, Any],
+        entry: Optional[Dict[str, Any]] = None,
         passed_features: Optional[Dict[str, Any]] = None,
         external: Optional[bool] = None,
         return_type: Literal["list", "polars", "numpy", "pandas"] = "list",
@@ -592,7 +592,7 @@ def get_feature_vector(
                 providing feature values which are not available in the feature store.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -635,7 +635,7 @@ def get_feature_vector(
 
     def get_feature_vectors(
         self,
-        entry: List[Dict[str, Any]],
+        entry: Optional[List[Dict[str, Any]]] = None,
         passed_features: Optional[List[Dict[str, Any]]] = None,
         external: Optional[bool] = None,
         return_type: Literal["list", "polars", "numpy", "pandas"] = "list",
@@ -705,7 +705,7 @@ def get_feature_vectors(
                 providing feature values which are not available in the feature store.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -777,7 +777,7 @@ def get_inference_helper(
                 Set of required primary keys is [`feature_view.primary_keys`](#primary_keys)
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -835,7 +835,7 @@ def get_inference_helpers(
                 Set of required primary keys is [`feature_view.primary_keys`](#primary_keys)
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -912,7 +912,7 @@ def find_neighbors(
             filter: A filter expression to restrict the search space (optional).
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -3567,7 +3567,7 @@ def transform(
             feature_vector: `Union[List[Any], List[List[Any]], pd.DataFrame, pl.DataFrame]`. The feature vector to be transformed.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
diff --git a/python/hsfs/storage_connector.py b/python/hsfs/storage_connector.py
index 7ed887cd9..15ccdc8d6 100644
--- a/python/hsfs/storage_connector.py
+++ b/python/hsfs/storage_connector.py
@@ -369,6 +369,7 @@ def prepare_spark(self, path: Optional[str] = None) -> Optional[str]:
         # Arguments
             path: Path to prepare for reading from cloud storage. Defaults to `None`.
         """
+        self.refetch()
         return engine.get_instance().setup_storage_connector(self, path)
 
     def connector_options(self) -> Dict[str, Any]:
diff --git a/python/hsfs/training_dataset.py b/python/hsfs/training_dataset.py
index 94688b692..7d9e89ec8 100644
--- a/python/hsfs/training_dataset.py
+++ b/python/hsfs/training_dataset.py
@@ -1007,7 +1007,7 @@ def init_prepared_statement(
                 initialised for retrieving serving vectors as a batch.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -1024,7 +1024,7 @@ def get_serving_vector(
                 serving application.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -1046,7 +1046,7 @@ def get_serving_vectors(
                 serving application.
             external: boolean, optional. If set to True, the connection to the
                 online feature store is established using the same host as
-                for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
+                for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
                 If set to False, the online feature store storage connector is used
                 which relies on the private IP. Defaults to True if connection to Hopsworks is established from
                 external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
diff --git a/python/hsml/core/hdfs_api.py b/python/hsml/core/hdfs_api.py
new file mode 100644
index 000000000..d786bce37
--- /dev/null
+++ b/python/hsml/core/hdfs_api.py
@@ -0,0 +1,93 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+from __future__ import annotations
+
+import os
+
+
+class HdfsApi:
+    def __init__(self):
+
+        import fsspec.implementations.arrow as pfs
+
+        host, port = os.environ["LIBHDFS_DEFAULT_FS"].split(":")
+
+        self._hopsfs = pfs.HadoopFileSystem(
+            host=host,
+            port=int(port),
+            user=os.environ["LIBHDFS_DEFAULT_USER"],
+        )
+
+    DEFAULT_BUFFER_SIZE = 0
+
+    def upload(
+        self,
+        local_path: str,
+        upload_path: str,
+        overwrite: bool = False,
+        buffer_size: int = DEFAULT_BUFFER_SIZE,
+    ):
+        """Upload file/directory to the Hopsworks filesystem.
+        :param local_path: local path to file to upload
+        :type local_path: str
+        :param upload_path: path to directory where to upload the file in Hopsworks filesystem
+        :type upload_path: str
+        :param overwrite: overwrite file if exists
+        :type overwrite: bool
+        :param buffer_size: size of the temporary read and write buffer. Defaults to 0.
+        :type buffer_size: int
+        """
+        # local path could be absolute or relative,
+        if not os.path.isabs(local_path) and os.path.exists(
+            os.path.join(os.getcwd(), local_path)
+        ):
+            local_path = os.path.join(os.getcwd(), local_path)
+
+        _, file_name = os.path.split(local_path)
+
+        destination_path = upload_path + "/" + file_name
+
+        if self._hopsfs.exists(destination_path):
+            if overwrite:
+                self._hopsfs.rm(destination_path, recursive=True)
+            else:
+                raise Exception(
+                    "{} already exists, set overwrite=True to overwrite it".format(
+                        local_path
+                    )
+                )
+
+        self._hopsfs.upload(
+            lpath=local_path,
+            rpath=destination_path,
+            recursive=True,
+            buffer_size=buffer_size,
+        )
+
+        return upload_path + "/" + os.path.basename(local_path)
+
+    def download(self, path, local_path, buffer_size=DEFAULT_BUFFER_SIZE):
+        """Download file/directory on a path in datasets.
+        :param path: path to download
+        :type path: str
+        :param local_path: path to download in datasets
+        :type local_path: str
+        :param buffer_size: size of the temporary read and write buffer. Defaults to 0.
+        :type buffer_size: int
+        """
+
+        self._hopsfs.download(path, local_path, recursive=True, buffer_size=buffer_size)
diff --git a/python/hsml/core/serving_api.py b/python/hsml/core/serving_api.py
index 92d947728..9a124465d 100644
--- a/python/hsml/core/serving_api.py
+++ b/python/hsml/core/serving_api.py
@@ -419,4 +419,7 @@ def _get_hopsworks_inference_path(self, project_id: int, deployment_instance):
         ]
 
     def _get_istio_inference_path(self, deployment_instance):
+        if deployment_instance.model_server == "VLLM":
+            return ["openai", "v1", "completions"]
+
         return ["v1", "models", deployment_instance.name + ":predict"]
diff --git a/python/hsml/deployment.py b/python/hsml/deployment.py
index 6999acc41..9c98b4e94 100644
--- a/python/hsml/deployment.py
+++ b/python/hsml/deployment.py
@@ -66,7 +66,7 @@ def __init__(
         self._model_registry_id = None
 
     @usage.method_logger
-    def save(self, await_update: Optional[int] = 60):
+    def save(self, await_update: Optional[int] = 120):
         """Persist this deployment including the predictor and metadata to Model Serving.
 
         # Arguments
@@ -78,7 +78,7 @@ def save(self, await_update: Optional[int] = 60):
         self._serving_engine.save(self, await_update)
 
     @usage.method_logger
-    def start(self, await_running: Optional[int] = 60):
+    def start(self, await_running: Optional[int] = 120):
         """Start the deployment
 
         # Arguments
@@ -90,7 +90,7 @@ def start(self, await_running: Optional[int] = 60):
         self._serving_engine.start(self, await_status=await_running)
 
     @usage.method_logger
-    def stop(self, await_stopped: Optional[int] = 60):
+    def stop(self, await_stopped: Optional[int] = 120):
         """Stop the deployment
 
         # Arguments
@@ -218,10 +218,14 @@ def get_model(self):
         )
 
     @usage.method_logger
-    def download_artifact(self):
-        """Download the model artifact served by the deployment"""
+    def download_artifact_files(self, local_path=None):
+        """Download the artifact files served by the deployment
 
-        return self._serving_engine.download_artifact(self)
+        # Arguments
+            local_path: path where to download the artifact files in the local filesystem
+        """
+
+        return self._serving_engine.download_artifact_files(self, local_path=local_path)
 
     def get_logs(self, component="predictor", tail=10):
         """Prints the deployment logs of the predictor or transformer.
@@ -372,9 +376,15 @@ def artifact_version(self):
     def artifact_version(self, artifact_version: Union[int, str]):
         self._predictor.artifact_version = artifact_version
 
+    @property
+    def artifact_files_path(self):
+        """Path of the artifact files deployed by the predictor."""
+        return self._predictor.artifact_files_path
+
     @property
     def artifact_path(self):
         """Path of the model artifact deployed by the predictor."""
+        # TODO: deprecated
         return self._predictor.artifact_path
 
     @property
diff --git a/python/hsml/engine/local_engine.py b/python/hsml/engine/local_engine.py
index 7b669a249..d703002da 100644
--- a/python/hsml/engine/local_engine.py
+++ b/python/hsml/engine/local_engine.py
@@ -17,7 +17,7 @@
 import os
 
 from hsml import client
-from hsml.core import dataset_api, model_api
+from hsml.core import dataset_api, hdfs_api, model_api
 
 
 class LocalEngine:
@@ -25,6 +25,11 @@ def __init__(self):
         self._dataset_api = dataset_api.DatasetApi()
         self._model_api = model_api.ModelApi()
 
+        try:
+            self._hdfs_api = hdfs_api.HdfsApi()
+        except Exception:
+            self._hdfs_api = None
+
     def mkdir(self, remote_path: str):
         remote_path = self._prepend_project_path(remote_path)
         self._dataset_api.mkdir(remote_path)
@@ -38,26 +43,55 @@ def upload(self, local_path: str, remote_path: str, upload_configuration=None):
 
         # Initialize the upload configuration to empty dictionary if is None
         upload_configuration = upload_configuration if upload_configuration else {}
-        self._dataset_api.upload(
-            local_path,
-            remote_path,
-            chunk_size=upload_configuration.get(
-                "chunk_size", self._dataset_api.DEFAULT_UPLOAD_FLOW_CHUNK_SIZE
-            ),
-            simultaneous_uploads=upload_configuration.get(
-                "simultaneous_uploads",
-                self._dataset_api.DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS,
-            ),
-            max_chunk_retries=upload_configuration.get(
-                "max_chunk_retries",
-                self._dataset_api.DEFAULT_UPLOAD_MAX_CHUNK_RETRIES,
-            ),
-        )
 
-    def download(self, remote_path: str, local_path: str):
+        if self._hdfs_api is not None:
+            # use the hdfs client if available
+            self._hdfs_api.upload(
+                local_path=local_path,
+                upload_path=remote_path,
+                buffer_size=upload_configuration.get(
+                    "buffer_size", self._hdfs_api.DEFAULT_BUFFER_SIZE
+                ),
+            )
+        else:
+            # otherwise, use the REST API
+            self._dataset_api.upload(
+                local_path,
+                remote_path,
+                chunk_size=upload_configuration.get(
+                    "chunk_size", self._dataset_api.DEFAULT_UPLOAD_FLOW_CHUNK_SIZE
+                ),
+                simultaneous_uploads=upload_configuration.get(
+                    "simultaneous_uploads",
+                    self._dataset_api.DEFAULT_UPLOAD_SIMULTANEOUS_UPLOADS,
+                ),
+                max_chunk_retries=upload_configuration.get(
+                    "max_chunk_retries",
+                    self._dataset_api.DEFAULT_UPLOAD_MAX_CHUNK_RETRIES,
+                ),
+            )
+
+    def download(self, remote_path: str, local_path: str, download_configuration=None):
         local_path = self._get_abs_path(local_path)
         remote_path = self._prepend_project_path(remote_path)
-        self._dataset_api.download(remote_path, local_path)
+
+        # Initialize the download configuration to empty dictionary if is None
+        download_configuration = (
+            download_configuration if download_configuration else {}
+        )
+
+        if self._hdfs_api is not None:
+            # use the hdfs client if available
+            self._hdfs_api.download(
+                path=remote_path,
+                local_path=local_path,
+                buffer_size=download_configuration.get(
+                    "buffer_size", self._hdfs_api.DEFAULT_BUFFER_SIZE
+                ),
+            )
+        else:
+            # otherwise, use the REST API
+            self._dataset_api.download(remote_path, local_path)
 
     def copy(self, source_path, destination_path):
         source_path = self._prepend_project_path(source_path)
diff --git a/python/hsml/engine/model_engine.py b/python/hsml/engine/model_engine.py
index 29a1a0234..bb6312f66 100644
--- a/python/hsml/engine/model_engine.py
+++ b/python/hsml/engine/model_engine.py
@@ -81,11 +81,11 @@ def _upload_additional_resources(self, model_instance):
         return model_instance
 
     def _copy_or_move_hopsfs_model_item(
-        self, item_attr, to_model_version_path, keep_original_files
+        self, item_attr, to_model_files_path, keep_original_files
     ):
         """Copy or move model item from a hdfs path to the model version folder in the Models dataset. It works with files and folders."""
         path = item_attr["path"]
-        to_hdfs_path = os.path.join(to_model_version_path, os.path.basename(path))
+        to_hdfs_path = os.path.join(to_model_files_path, os.path.basename(path))
         if keep_original_files:
             self._engine.copy(path, to_hdfs_path)
         else:
@@ -94,7 +94,7 @@ def _copy_or_move_hopsfs_model_item(
     def _copy_or_move_hopsfs_model(
         self,
         from_hdfs_model_path,
-        to_model_version_path,
+        to_model_files_path,
         keep_original_files,
         update_upload_progress,
     ):
@@ -123,7 +123,7 @@ def _copy_or_move_hopsfs_model(
             )["items"]:
                 path_attr = entry["attributes"]
                 self._copy_or_move_hopsfs_model_item(
-                    path_attr, to_model_version_path, keep_original_files
+                    path_attr, to_model_files_path, keep_original_files
                 )
                 if path_attr.get("dir", False):
                     n_dirs += 1
@@ -133,7 +133,7 @@ def _copy_or_move_hopsfs_model(
         else:
             # if path is a file, copy/move it
             self._copy_or_move_hopsfs_model_item(
-                model_path_attr, to_model_version_path, keep_original_files
+                model_path_attr, to_model_files_path, keep_original_files
             )
             n_files += 1
             update_upload_progress(n_dirs=n_dirs, n_files=n_files)
@@ -157,7 +157,9 @@ def _download_model_from_hopsfs_recursive(
 
             if path_attr.get("dir", False):
                 # otherwise, make a recursive call for the folder
-                if basename == "Artifacts":
+                if (
+                    basename == constants.MODEL_SERVING.ARTIFACTS_DIR_NAME
+                ):  # TODO: Not needed anymore
                     continue  # skip Artifacts subfolder
                 local_folder_path = os.path.join(to_local_path, basename)
                 os.mkdir(local_folder_path)
@@ -196,11 +198,11 @@ def _download_model_from_hopsfs(
     def _upload_local_model(
         self,
         from_local_model_path,
-        to_model_version_path,
+        to_model_files_path,
         update_upload_progress,
         upload_configuration=None,
     ):
-        """Copy or upload model files from a local path to the model version folder in the Models dataset."""
+        """Copy or upload model files from a local path to the model files folder in the Models dataset."""
         n_dirs, n_files = 0, 0
         if os.path.isdir(from_local_model_path):
             # if path is a dir, upload files and folders iteratively
@@ -211,8 +213,8 @@ def _upload_local_model(
                 # - files is the list of file names present in the root dir
                 # we need to replace the local path prefix with the hdfs path prefix (i.e., /srv/hops/....../root with /Projects/.../)
                 remote_base_path = root.replace(
-                    from_local_model_path, to_model_version_path
-                )
+                    from_local_model_path, to_model_files_path
+                ).replace(os.sep, "/")
                 for d_name in dirs:
                     self._engine.mkdir(remote_base_path + "/" + d_name)
                     n_dirs += 1
@@ -229,7 +231,7 @@ def _upload_local_model(
             # if path is a file, upload file
             self._engine.upload(
                 from_local_model_path,
-                to_model_version_path,
+                to_model_files_path,
                 upload_configuration=upload_configuration,
             )
             n_files += 1
@@ -250,14 +252,14 @@ def _save_model_from_local_or_hopsfs_mount(
                 from_hdfs_model_path=model_path.replace(
                     constants.MODEL_REGISTRY.HOPSFS_MOUNT_PREFIX, ""
                 ),
-                to_model_version_path=model_instance.version_path,
+                to_model_files_path=model_instance.model_files_path,
                 keep_original_files=keep_original_files,
                 update_upload_progress=update_upload_progress,
             )
         else:
             self._upload_local_model(
                 from_local_model_path=model_path,
-                to_model_version_path=model_instance.version_path,
+                to_model_files_path=model_instance.model_files_path,
                 update_upload_progress=update_upload_progress,
                 upload_configuration=upload_configuration,
             )
@@ -366,6 +368,7 @@ def save(
                 if step["id"] == 0:
                     # Create folders
                     self._engine.mkdir(model_instance.version_path)
+                    self._engine.mkdir(model_instance.model_files_path)
                 if step["id"] == 1:
 
                     def update_upload_progress(n_dirs=0, n_files=0, step=step):
@@ -375,7 +378,7 @@ def update_upload_progress(n_dirs=0, n_files=0, step=step):
 
                     update_upload_progress(n_dirs=0, n_files=0)
 
-                    # Upload Model files from local path to /Models/{model_instance._name}/{model_instance._version}
+                    # Upload Model files from local path to /Models/{model_instance._name}/{model_instance._version}/Files
                     # check local absolute
                     if os.path.isabs(model_path) and os.path.exists(model_path):
                         self._save_model_from_local_or_hopsfs_mount(
@@ -402,7 +405,7 @@ def update_upload_progress(n_dirs=0, n_files=0, step=step):
                     ):  # check hdfs relative and absolute
                         self._copy_or_move_hopsfs_model(
                             from_hdfs_model_path=model_path,
-                            to_model_version_path=model_instance.version_path,
+                            to_model_files_path=model_instance.model_files_path,
                             keep_original_files=keep_original_files,
                             update_upload_progress=update_upload_progress,
                         )
@@ -432,12 +435,13 @@ def update_upload_progress(n_dirs=0, n_files=0, step=step):
 
         return model_instance
 
-    def download(self, model_instance):
-        model_name_path = os.path.join(
-            tempfile.gettempdir(), str(uuid.uuid4()), model_instance._name
-        )
-        model_version_path = model_name_path + "/" + str(model_instance._version)
-        os.makedirs(model_version_path)
+    def download(self, model_instance, local_path=None):
+        if local_path is None:
+            local_path = os.path.join(
+                tempfile.gettempdir(), str(uuid.uuid4()), model_instance._name
+            )
+            local_path = local_path + "/" + str(model_instance._version)
+        os.makedirs(local_path, exist_ok=True)
 
         def update_download_progress(n_dirs, n_files, done=False):
             print(
@@ -447,20 +451,20 @@ def update_download_progress(n_dirs, n_files, done=False):
             )
 
         try:
-            from_hdfs_model_path = model_instance.version_path
+            from_hdfs_model_path = model_instance.model_files_path
             if from_hdfs_model_path.startswith("hdfs:/"):
                 projects_index = from_hdfs_model_path.find("/Projects", 0)
                 from_hdfs_model_path = from_hdfs_model_path[projects_index:]
 
             self._download_model_from_hopsfs(
                 from_hdfs_model_path=from_hdfs_model_path,
-                to_local_path=model_version_path,
+                to_local_path=local_path,
                 update_download_progress=update_download_progress,
             )
         except BaseException as be:
             raise be
 
-        return model_version_path
+        return local_path
 
     def read_file(self, model_instance, resource):
         hdfs_resource_path = self._build_resource_path(
diff --git a/python/hsml/engine/serving_engine.py b/python/hsml/engine/serving_engine.py
index 1151fb79b..4bc377ae5 100644
--- a/python/hsml/engine/serving_engine.py
+++ b/python/hsml/engine/serving_engine.py
@@ -15,17 +15,23 @@
 #
 
 import os
+import tempfile
 import time
 import uuid
 from typing import Dict, List, Union
 
-from hopsworks_common import util
 from hopsworks_common.client.exceptions import ModelServingException, RestAPIError
 from hopsworks_common.client.istio.utils.infer_type import InferInput
-from hopsworks_common.constants import DEPLOYMENT, PREDICTOR, PREDICTOR_STATE
+from hopsworks_common.constants import (
+    DEPLOYMENT,
+    MODEL_SERVING,
+    PREDICTOR,
+    PREDICTOR_STATE,
+)
 from hopsworks_common.constants import INFERENCE_ENDPOINTS as IE
 from hopsworks_common.core import dataset_api
 from hsml.core import serving_api
+from hsml.engine import local_engine
 from tqdm.auto import tqdm
 
 
@@ -46,6 +52,8 @@ def __init__(self):
         self._serving_api = serving_api.ServingApi()
         self._dataset_api = dataset_api.DatasetApi()
 
+        self._engine = local_engine.LocalEngine()
+
     def _poll_deployment_status(
         self, deployment_instance, status: str, await_status: int, update_progress=None
     ):
@@ -299,7 +307,64 @@ def _get_stopped_instances(self, available_instances, requested_instances):
         num_instances = requested_instances - available_instances
         return num_instances if num_instances >= 0 else 0
 
-    def download_artifact(self, deployment_instance):
+    def _download_files_from_hopsfs_recursive(
+        self,
+        from_hdfs_path: str,
+        to_local_path: str,
+        update_download_progress,
+        n_dirs,
+        n_files,
+    ):
+        """Download model files from a model path in hdfs, recursively"""
+
+        for entry in self._dataset_api.list(from_hdfs_path, sort_by="NAME:desc")[
+            "items"
+        ]:
+            path_attr = entry["attributes"]
+            path = path_attr["path"]
+            basename = os.path.basename(path)
+
+            if path_attr.get("dir", False):
+                # otherwise, make a recursive call for the folder
+                if (
+                    basename == MODEL_SERVING.ARTIFACTS_DIR_NAME
+                ):  # TODO: Not needed anymore
+                    continue  # skip Artifacts subfolder
+                local_folder_path = os.path.join(to_local_path, basename)
+                os.mkdir(local_folder_path)
+                n_dirs, n_files = self._download_files_from_hopsfs_recursive(
+                    from_hdfs_path=path,
+                    to_local_path=local_folder_path,
+                    update_download_progress=update_download_progress,
+                    n_dirs=n_dirs,
+                    n_files=n_files,
+                )
+                n_dirs += 1
+                update_download_progress(n_dirs=n_dirs, n_files=n_files)
+            else:
+                # if it's a file, download it
+                local_file_path = os.path.join(to_local_path, basename)
+                self._engine.download(path, local_file_path)
+                n_files += 1
+                update_download_progress(n_dirs=n_dirs, n_files=n_files)
+
+        return n_dirs, n_files
+
+    def _download_files_from_hopsfs(
+        self, from_hdfs_path: str, to_local_path: str, update_download_progress
+    ):
+        """Download files from a model path in hdfs."""
+
+        n_dirs, n_files = self._download_files_from_hopsfs_recursive(
+            from_hdfs_path=from_hdfs_path,
+            to_local_path=to_local_path,
+            update_download_progress=update_download_progress,
+            n_dirs=0,
+            n_files=0,
+        )
+        update_download_progress(n_dirs=n_dirs, n_files=n_files, done=True)
+
+    def download_artifact_files(self, deployment_instance, local_path=None):
         if deployment_instance.id is None:
             raise ModelServingException(
                 "Deployment is not created yet. To create the deployment use `.save()`"
@@ -311,30 +376,39 @@ def download_artifact(self, deployment_instance):
                  Download the model files by using `model.download()`"
             )
 
-        from_artifact_zip_path = deployment_instance.artifact_path
-        to_artifacts_path = os.path.join(
-            os.getcwd(),
-            str(uuid.uuid4()),
-            deployment_instance.model_name,
-            str(deployment_instance.model_version),
-            "Artifacts",
-        )
-        to_artifact_version_path = (
-            to_artifacts_path + "/" + str(deployment_instance.artifact_version)
-        )
-        to_artifact_zip_path = to_artifact_version_path + ".zip"
+        if local_path is None:
+            local_path = os.path.join(
+                tempfile.gettempdir(),
+                str(uuid.uuid4()),
+                deployment_instance.model_name,
+                str(deployment_instance.model_version),
+                MODEL_SERVING.ARTIFACTS_DIR_NAME,
+                str(deployment_instance.artifact_version),
+            )
+        os.makedirs(local_path, exist_ok=True)
 
-        os.makedirs(to_artifacts_path)
+        def update_download_progress(n_dirs, n_files, done=False):
+            print(
+                "Downloading artifact files (%s dirs, %s files)... %s"
+                % (n_dirs, n_files, "DONE" if done else ""),
+                end="\r",
+            )
 
         try:
-            self._dataset_api.download(from_artifact_zip_path, to_artifact_zip_path)
-            util.decompress(to_artifact_zip_path, extract_dir=to_artifacts_path)
-            os.remove(to_artifact_zip_path)
-        finally:
-            if os.path.exists(to_artifact_zip_path):
-                os.remove(to_artifact_zip_path)
+            from_hdfs_path = deployment_instance.artifact_files_path
+            if from_hdfs_path.startswith("hdfs:/"):
+                projects_index = from_hdfs_path.find("/Projects", 0)
+                from_hdfs_path = from_hdfs_path[projects_index:]
+
+            self._download_files_from_hopsfs(
+                from_hdfs_path=from_hdfs_path,
+                to_local_path=local_path,
+                update_download_progress=update_download_progress,
+            )
+        except BaseException as be:
+            raise be
 
-        return to_artifact_version_path
+        return local_path
 
     def create(self, deployment_instance):
         try:
@@ -488,7 +562,10 @@ def predict(
         inputs: Union[Dict, List[Dict]],
     ):
         # validate user-provided payload
-        self._validate_inference_payload(deployment_instance.api_protocol, data, inputs)
+        if deployment_instance.model_server != "VLLM":
+            self._validate_inference_payload(
+                deployment_instance.api_protocol, data, inputs
+            )
 
         # build inference payload based on API protocol
         payload = self._build_inference_payload(
diff --git a/python/hsml/llm/__init__.py b/python/hsml/llm/__init__.py
new file mode 100644
index 000000000..ff8055b9b
--- /dev/null
+++ b/python/hsml/llm/__init__.py
@@ -0,0 +1,15 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
diff --git a/python/hsml/llm/model.py b/python/hsml/llm/model.py
new file mode 100644
index 000000000..b52cf6398
--- /dev/null
+++ b/python/hsml/llm/model.py
@@ -0,0 +1,75 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+import humps
+from hsml.constants import MODEL
+from hsml.model import Model
+
+
+class Model(Model):
+    """Metadata object representing a LLM model in the Model Registry."""
+
+    def __init__(
+        self,
+        id,
+        name,
+        version=None,
+        created=None,
+        creator=None,
+        environment=None,
+        description=None,
+        project_name=None,
+        metrics=None,
+        program=None,
+        user_full_name=None,
+        model_schema=None,
+        training_dataset=None,
+        input_example=None,
+        model_registry_id=None,
+        tags=None,
+        href=None,
+        feature_view=None,
+        training_dataset_version=None,
+        **kwargs,
+    ):
+        super().__init__(
+            id,
+            name,
+            version=version,
+            created=created,
+            creator=creator,
+            environment=environment,
+            description=description,
+            project_name=project_name,
+            metrics=metrics,
+            program=program,
+            user_full_name=user_full_name,
+            model_schema=model_schema,
+            training_dataset=training_dataset,
+            input_example=input_example,
+            framework=MODEL.FRAMEWORK_LLM,
+            model_registry_id=model_registry_id,
+            feature_view=feature_view,
+            training_dataset_version=training_dataset_version,
+        )
+
+    def update_from_response_json(self, json_dict):
+        json_decamelized = humps.decamelize(json_dict)
+        json_decamelized.pop("framework")
+        if "type" in json_decamelized:  # backwards compatibility
+            _ = json_decamelized.pop("type")
+        self.__init__(**json_decamelized)
+        return self
diff --git a/python/hsml/llm/predictor.py b/python/hsml/llm/predictor.py
new file mode 100644
index 000000000..814edc522
--- /dev/null
+++ b/python/hsml/llm/predictor.py
@@ -0,0 +1,28 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+from hsml.constants import MODEL, PREDICTOR
+from hsml.predictor import Predictor
+
+
+class Predictor(Predictor):
+    """Configuration for a predictor running with the vLLM backend"""
+
+    def __init__(self, **kwargs):
+        kwargs["model_framework"] = MODEL.FRAMEWORK_LLM
+        kwargs["model_server"] = PREDICTOR.MODEL_SERVER_VLLM
+
+        super().__init__(**kwargs)
diff --git a/python/hsml/llm/signature.py b/python/hsml/llm/signature.py
new file mode 100644
index 000000000..05ff003eb
--- /dev/null
+++ b/python/hsml/llm/signature.py
@@ -0,0 +1,79 @@
+#
+#   Copyright 2024 Hopsworks AB
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+
+from typing import Optional, Union
+
+import numpy
+import pandas
+from hopsworks_common import usage
+from hsml.llm.model import Model
+from hsml.model_schema import ModelSchema
+
+
+_mr = None
+
+
+@usage.method_logger
+def create_model(
+    name: str,
+    version: Optional[int] = None,
+    metrics: Optional[dict] = None,
+    description: Optional[str] = None,
+    input_example: Optional[
+        Union[pandas.DataFrame, pandas.Series, numpy.ndarray, list]
+    ] = None,
+    model_schema: Optional[ModelSchema] = None,
+    feature_view=None,
+    training_dataset_version: Optional[int] = None,
+):
+    """Create an LLM model metadata object.
+
+    !!! note "Lazy"
+        This method is lazy and does not persist any metadata or uploads model artifacts in the
+        model registry on its own. To save the model object and the model artifacts, call the `save()` method with a
+        local file path to the directory containing the model artifacts.
+
+    # Arguments
+        name: Name of the model to create.
+        version: Optionally version of the model to create, defaults to `None` and
+            will create the model with incremented version from the last
+            version in the model registry.
+        metrics: Optionally a dictionary with model evaluation metrics (e.g., accuracy, MAE)
+        description: Optionally a string describing the model, defaults to empty string
+            `""`.
+        input_example: Optionally an input example that represents a single input for the model, defaults to `None`.
+        model_schema: Optionally a model schema for the model inputs and/or outputs.
+        feature_view: Optionally a feature view object returned by querying the feature store. If the feature view is not provided, the model will not have access to provenance.
+        training_dataset_version: Optionally a training dataset version. If training dataset version is not provided, but the feature view is provided, the training dataset version used will be the last accessed training dataset of the feature view, within the code/notebook that reads the feature view and training dataset and then creates the model.
+
+    # Returns
+        `Model`. The model metadata object.
+    """
+    model = Model(
+        id=None,
+        name=name,
+        version=version,
+        description=description,
+        metrics=metrics,
+        input_example=input_example,
+        model_schema=model_schema,
+        feature_view=feature_view,
+        training_dataset_version=training_dataset_version,
+    )
+    model._shared_registry_project_name = _mr.shared_registry_project_name
+    model._model_registry_id = _mr.model_registry_id
+
+    return model
diff --git a/python/hsml/model.py b/python/hsml/model.py
index 3e39a7b26..2c897b50d 100644
--- a/python/hsml/model.py
+++ b/python/hsml/model.py
@@ -17,19 +17,22 @@
 import json
 import logging
 import os
+import re
 import warnings
 from typing import Any, Dict, Optional, Union
 
 import humps
 from hopsworks_common import client, usage, util
-from hopsworks_common.constants import ARTIFACT_VERSION
+from hopsworks_common.constants import ARTIFACT_VERSION, MODEL_REGISTRY
 from hopsworks_common.constants import INFERENCE_ENDPOINTS as IE
 from hsml.core import explicit_provenance
 from hsml.engine import model_engine
 from hsml.inference_batcher import InferenceBatcher
 from hsml.inference_logger import InferenceLogger
+from hsml.model_schema import ModelSchema
 from hsml.predictor import Predictor
 from hsml.resources import PredictorResources
+from hsml.schema import Schema
 from hsml.transformer import Transformer
 
 
@@ -53,7 +56,6 @@ def __init__(
         program=None,
         user_full_name=None,
         model_schema=None,
-        training_dataset=None,
         input_example=None,
         framework=None,
         model_registry_id=None,
@@ -83,7 +85,6 @@ def __init__(
         self._input_example = input_example
         self._framework = framework
         self._model_schema = model_schema
-        self._training_dataset = training_dataset
 
         # This is needed for update_from_response_json function to not overwrite name of the shared registry this model originates from
         if not hasattr(self, "_shared_registry_project_name"):
@@ -94,17 +95,6 @@ def __init__(
         self._model_engine = model_engine.ModelEngine()
         self._feature_view = feature_view
         self._training_dataset_version = training_dataset_version
-        if training_dataset_version is None and feature_view is not None:
-            if feature_view.get_last_accessed_training_dataset() is not None:
-                self._training_dataset_version = (
-                    feature_view.get_last_accessed_training_dataset()
-                )
-            else:
-                warnings.warn(
-                    "Provenance cached data - feature view provided, but training dataset version is missing",
-                    util.ProvenanceWarning,
-                    stacklevel=1,
-                )
 
     @usage.method_logger
     def save(
@@ -130,6 +120,39 @@ def save(
         # Returns
             `Model`: The model metadata object.
         """
+        if self._training_dataset_version is None and self._feature_view is not None:
+            if self._feature_view.get_last_accessed_training_dataset() is not None:
+                self._training_dataset_version = (
+                    self._feature_view.get_last_accessed_training_dataset()
+                )
+            else:
+                warnings.warn(
+                    "Provenance cached data - feature view provided, but training dataset version is missing",
+                    util.ProvenanceWarning,
+                    stacklevel=1,
+                )
+        if self._model_schema is None:
+            if (
+                self._feature_view is not None
+                and self._training_dataset_version is not None
+            ):
+                all_features = self._feature_view.get_training_dataset_schema(
+                    self._training_dataset_version
+                )
+                features, labels = [], []
+                for feature in all_features:
+                    (labels if feature.label else features).append(feature.to_dict())
+                self._model_schema = ModelSchema(
+                    input_schema=Schema(features) if features else None,
+                    output_schema=Schema(labels) if labels else None,
+                )
+            else:
+                warnings.warn(
+                    "Model schema cannot not be inferred without both the feature view and the training dataset version.",
+                    util.ProvenanceWarning,
+                    stacklevel=1,
+                )
+
         return self._model_engine.save(
             model_instance=self,
             model_path=model_path,
@@ -139,13 +162,15 @@ def save(
         )
 
     @usage.method_logger
-    def download(self):
+    def download(self, local_path=None):
         """Download the model files.
 
+        # Arguments
+            local_path: path where to download the model files in the local filesystem
         # Returns
             `str`: Absolute path to local folder containing the model files.
         """
-        return self._model_engine.download(model_instance=self)
+        return self._model_engine.download(model_instance=self, local_path=local_path)
 
     @usage.method_logger
     def delete(self):
@@ -211,7 +236,7 @@ def deploy(
         """
 
         if name is None:
-            name = self._name
+            name = self._get_default_serving_name()
 
         predictor = Predictor.for_model(
             self,
@@ -341,6 +366,9 @@ def get_training_dataset_provenance(self):
         """
         return self._model_engine.get_training_dataset_provenance(model_instance=self)
 
+    def _get_default_serving_name(self):
+        return re.sub(r"[^a-zA-Z0-9]", "", self._name)
+
     @classmethod
     def from_response_json(cls, json_dict):
         json_decamelized = humps.decamelize(json_dict)
@@ -372,7 +400,6 @@ def to_dict(self):
             "inputExample": self._input_example,
             "framework": self._framework,
             "metrics": self._training_metrics,
-            "trainingDataset": self._training_dataset,
             "environment": self._environment,
             "program": self._program,
             "featureView": util.feature_view_to_json(self._feature_view),
@@ -507,15 +534,6 @@ def model_schema(self):
     def model_schema(self, model_schema):
         self._model_schema = model_schema
 
-    @property
-    def training_dataset(self):
-        """training_dataset of the model."""
-        return self._training_dataset
-
-    @training_dataset.setter
-    def training_dataset(self, training_dataset):
-        self._training_dataset = training_dataset
-
     @property
     def project_name(self):
         """project_name of the model."""
@@ -544,6 +562,14 @@ def version_path(self):
         """path of the model including version folder. Resolves to /Projects/{project_name}/Models/{name}/{version}"""
         return "{}/{}".format(self.model_path, str(self.version))
 
+    @property
+    def model_files_path(self):
+        """path of the model files including version and files folder. Resolves to /Projects/{project_name}/Models/{name}/{version}/Files"""
+        return "{}/{}".format(
+            self.version_path,
+            MODEL_REGISTRY.MODEL_FILES_DIR_NAME,
+        )
+
     @property
     def shared_registry_project_name(self):
         """shared_registry_project_name of the model."""
diff --git a/python/hsml/model_registry.py b/python/hsml/model_registry.py
index cfd9136aa..9309eb7c4 100644
--- a/python/hsml/model_registry.py
+++ b/python/hsml/model_registry.py
@@ -19,6 +19,7 @@
 import humps
 from hopsworks_common import usage, util
 from hsml.core import model_api
+from hsml.llm import signature as llm_signature  # noqa: F401
 from hsml.python import signature as python_signature  # noqa: F401
 from hsml.sklearn import signature as sklearn_signature  # noqa: F401
 from hsml.tensorflow import signature as tensorflow_signature  # noqa: F401
@@ -48,11 +49,13 @@ def __init__(
         self._python = python_signature
         self._sklearn = sklearn_signature
         self._torch = torch_signature
+        self._llm = llm_signature
 
         tensorflow_signature._mr = self
         python_signature._mr = self
         sklearn_signature._mr = self
         torch_signature._mr = self
+        llm_signature._mr = self
 
     @classmethod
     def from_response_json(cls, json_dict):
@@ -190,6 +193,12 @@ def python(self):
 
         return python_signature
 
+    @property
+    def llm(self):
+        """Module for exporting a Large Language Model."""
+
+        return llm_signature
+
     def __repr__(self):
         project_name = (
             self._shared_registry_project_name
diff --git a/python/hsml/model_serving.py b/python/hsml/model_serving.py
index b58942ba7..2d24d2b20 100644
--- a/python/hsml/model_serving.py
+++ b/python/hsml/model_serving.py
@@ -125,7 +125,7 @@ def get_deployments(self, model: Model = None, status: str = None):
             `RestAPIError`: If unable to retrieve deployments from model serving.
         """
 
-        model_name = model.name if model is not None else None
+        model_name = model._get_default_serving_name() if model is not None else None
         if status is not None:
             self._validate_deployment_status(status)
 
@@ -207,7 +207,7 @@ def create_predictor(
         """
 
         if name is None:
-            name = model.name
+            name = model._get_default_serving_name()
 
         return Predictor.for_model(
             model,
diff --git a/python/hsml/predictor.py b/python/hsml/predictor.py
index b7f02b66b..31c6aa138 100644
--- a/python/hsml/predictor.py
+++ b/python/hsml/predictor.py
@@ -22,6 +22,7 @@
     ARTIFACT_VERSION,
     INFERENCE_ENDPOINTS,
     MODEL,
+    MODEL_SERVING,
     PREDICTOR,
     RESOURCES,
     Default,
@@ -168,18 +169,22 @@ def _validate_serving_tool(cls, serving_tool):
 
     @classmethod
     def _validate_script_file(cls, model_framework, script_file):
-        if model_framework == MODEL.FRAMEWORK_PYTHON and script_file is None:
+        if script_file is None and (
+            model_framework == MODEL.FRAMEWORK_PYTHON
+            or model_framework == MODEL.FRAMEWORK_LLM
+        ):
             raise ValueError(
-                "Predictor scripts are required in deployments for custom Python models"
+                "Predictor scripts are required in deployments for custom Python models and LLMs."
             )
 
     @classmethod
     def _infer_model_server(cls, model_framework):
-        return (
-            PREDICTOR.MODEL_SERVER_TF_SERVING
-            if model_framework == MODEL.FRAMEWORK_TENSORFLOW
-            else PREDICTOR.MODEL_SERVER_PYTHON
-        )
+        if model_framework == MODEL.FRAMEWORK_TENSORFLOW:
+            return PREDICTOR.MODEL_SERVER_TF_SERVING
+        elif model_framework == MODEL.FRAMEWORK_LLM:
+            return PREDICTOR.MODEL_SERVER_VLLM
+        else:
+            return PREDICTOR.MODEL_SERVER_PYTHON
 
     @classmethod
     def _get_default_serving_tool(cls):
@@ -392,9 +397,19 @@ def artifact_version(self):
     def artifact_version(self, artifact_version: Union[int, str]):
         self._artifact_version = artifact_version
 
+    @property
+    def artifact_files_path(self):
+        return "{}/{}/{}/{}".format(
+            self._model_path,
+            str(self._model_version),
+            MODEL_SERVING.ARTIFACTS_DIR_NAME,
+            str(self._artifact_version),
+        )
+
     @property
     def artifact_path(self):
         """Path of the model artifact deployed by the predictor. Resolves to /Projects/{project_name}/Models/{name}/{version}/Artifacts/{artifact_version}/{name}_{version}_{artifact_version}.zip"""
+        # TODO: Deprecated
         artifact_name = "{}_{}_{}.zip".format(
             self._model_name, str(self._model_version), str(self._artifact_version)
         )
diff --git a/python/hsml/python/signature.py b/python/hsml/python/signature.py
index 1bb5fa8f7..fa704aaab 100644
--- a/python/hsml/python/signature.py
+++ b/python/hsml/python/signature.py
@@ -56,6 +56,8 @@ def create_model(
             `""`.
         input_example: Optionally an input example that represents a single input for the model, defaults to `None`.
         model_schema: Optionally a model schema for the model inputs and/or outputs.
+        feature_view: Optionally a feature view object returned by querying the feature store. If the feature view is not provided, the model will not have access to provenance.
+        training_dataset_version: Optionally a training dataset version. If training dataset version is not provided, but the feature view is provided, the training dataset version used will be the last accessed training dataset of the feature view, within the code/notebook that reads the feature view and training dataset and then creates the model.
 
     # Returns
         `Model`. The model metadata object.
diff --git a/python/hsml/sklearn/signature.py b/python/hsml/sklearn/signature.py
index f8816febb..4c145a96a 100644
--- a/python/hsml/sklearn/signature.py
+++ b/python/hsml/sklearn/signature.py
@@ -56,6 +56,8 @@ def create_model(
             `""`.
         input_example: Optionally an input example that represents a single input for the model, defaults to `None`.
         model_schema: Optionally a model schema for the model inputs and/or outputs.
+        feature_view: Optionally a feature view object returned by querying the feature store. If the feature view is not provided, the model will not have access to provenance.
+        training_dataset_version: Optionally a training dataset version. If training dataset version is not provided, but the feature view is provided, the training dataset version used will be the last accessed training dataset of the feature view, within the code/notebook that reads the feature view and training dataset and then creates the model.
 
     # Returns
         `Model`. The model metadata object.
diff --git a/python/hsml/tensorflow/signature.py b/python/hsml/tensorflow/signature.py
index 1f83c5496..e24d20e65 100644
--- a/python/hsml/tensorflow/signature.py
+++ b/python/hsml/tensorflow/signature.py
@@ -56,6 +56,8 @@ def create_model(
             `""`.
         input_example: Optionally an input example that represents a single input for the model, defaults to `None`.
         model_schema: Optionally a model schema for the model inputs and/or outputs.
+        feature_view: Optionally a feature view object returned by querying the feature store. If the feature view is not provided, the model will not have access to provenance.
+        training_dataset_version: Optionally a training dataset version. If training dataset version is not provided, but the feature view is provided, the training dataset version used will be the last accessed training dataset of the feature view, within the code/notebook that reads the feature view and training dataset and then creates the model.
 
     # Returns
         `Model`. The model metadata object.
diff --git a/python/hsml/torch/signature.py b/python/hsml/torch/signature.py
index 5234d110a..bab488974 100644
--- a/python/hsml/torch/signature.py
+++ b/python/hsml/torch/signature.py
@@ -56,6 +56,8 @@ def create_model(
             `""`.
         input_example: Optionally an input example that represents a single input for the model, defaults to `None`.
         model_schema: Optionally a model schema for the model inputs and/or outputs.
+        feature_view: Optionally a feature view object returned by querying the feature store. If the feature view is not provided, the model will not have access to provenance.
+        training_dataset_version: Optionally a training dataset version. If training dataset version is not provided, but the feature view is provided, the training dataset version used will be the last accessed training dataset of the feature view, within the code/notebook that reads the feature view and training dataset and then creates the model.
 
     # Returns
         `Model`. The model metadata object.
diff --git a/python/hsml/utils/schema/columnar_schema.py b/python/hsml/utils/schema/columnar_schema.py
index 3aa5fde0e..a7468401f 100644
--- a/python/hsml/utils/schema/columnar_schema.py
+++ b/python/hsml/utils/schema/columnar_schema.py
@@ -20,11 +20,6 @@
 from hsml.utils.schema.column import Column
 
 
-try:
-    import hsfs
-except ImportError:
-    pass
-
 try:
     import pyspark
 except ImportError:
@@ -35,6 +30,10 @@ class ColumnarSchema:
     """Metadata object representing a columnar schema for a model."""
 
     def __init__(self, columnar_obj=None):
+        from hsfs.training_dataset import (
+            TrainingDataset,  # import performed here to prevent circular dependencies when importing ModelSchema
+        )
+
         if isinstance(columnar_obj, list):
             self.columns = self._convert_list_to_schema(columnar_obj)
         elif isinstance(columnar_obj, pandas.DataFrame):
@@ -45,9 +44,7 @@ def __init__(self, columnar_obj=None):
             columnar_obj, pyspark.sql.dataframe.DataFrame
         ):
             self.columns = self._convert_spark_to_schema(columnar_obj)
-        elif importlib.util.find_spec("hsfs") is not None and isinstance(
-            columnar_obj, hsfs.training_dataset.TrainingDataset
-        ):
+        elif isinstance(columnar_obj, TrainingDataset):
             self.columns = self._convert_td_to_schema(columnar_obj)
         else:
             raise TypeError(
diff --git a/python/pyproject.toml b/python/pyproject.toml
index a66d15115..6ff1e6a0c 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -54,7 +54,7 @@ dependencies = [
     "opensearch-py>=1.1.0,<=2.4.2",
     "tqdm",
     "grpcio>=1.49.1,<2.0.0",         # ^1.49.1
-    "protobuf>=3.19.0,<4.0.0",       # ^3.19.0
+    "protobuf>=4.25.4,<5.0.0",       # ^4.25.4
 ]
 
 [project.optional-dependencies]
diff --git a/python/tests/core/test_feature_group_engine.py b/python/tests/core/test_feature_group_engine.py
index 91f1086ed..e57f2c0c3 100644
--- a/python/tests/core/test_feature_group_engine.py
+++ b/python/tests/core/test_feature_group_engine.py
@@ -56,6 +56,49 @@ def test_save(self, mocker):
         # Assert
         assert mock_engine_get_instance.return_value.save_dataframe.call_count == 1
 
+    def test_save_dataframe_transformation_functions(self, mocker):
+        # Arrange
+        feature_store_id = 99
+
+        mocker.patch("hsfs.engine.get_type")
+        mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
+        mocker.patch(
+            "hsfs.core.feature_group_engine.FeatureGroupEngine.save_feature_group_metadata"
+        )
+        mocker.patch("hsfs.core.great_expectation_engine.GreatExpectationEngine")
+
+        fg_engine = feature_group_engine.FeatureGroupEngine(
+            feature_store_id=feature_store_id
+        )
+
+        @udf(int)
+        def test(feature):
+            return feature + 1
+
+        fg = feature_group.FeatureGroup(
+            name="test",
+            version=1,
+            featurestore_id=feature_store_id,
+            primary_key=[],
+            partition_key=[],
+            transformation_functions=[test],
+            id=10,
+        )
+
+        # Act
+        fg_engine.save(
+            feature_group=fg,
+            feature_dataframe=None,
+            write_options=None,
+        )
+
+        # Assert
+        assert mock_engine_get_instance.return_value.save_dataframe.call_count == 1
+        assert (
+            mock_engine_get_instance.return_value._apply_transformation_function.call_count
+            == 1
+        )
+
     def test_save_ge_report(self, mocker):
         # Arrange
         feature_store_id = 99
@@ -143,6 +186,56 @@ def test_insert(self, mocker):
         assert mock_fg_api.return_value.delete_content.call_count == 0
         assert mock_engine_get_instance.return_value.save_dataframe.call_count == 1
 
+    def test_insert_transformation_functions(self, mocker):
+        # Arrange
+        feature_store_id = 99
+
+        mocker.patch("hsfs.engine.get_type")
+        mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
+        mocker.patch(
+            "hsfs.core.feature_group_engine.FeatureGroupEngine.save_feature_group_metadata"
+        )
+        mocker.patch(
+            "hsfs.core.feature_group_engine.FeatureGroupEngine._verify_schema_compatibility"
+        )
+        mocker.patch("hsfs.core.great_expectation_engine.GreatExpectationEngine")
+        mock_fg_api = mocker.patch("hsfs.core.feature_group_api.FeatureGroupApi")
+
+        fg_engine = feature_group_engine.FeatureGroupEngine(
+            feature_store_id=feature_store_id
+        )
+
+        @udf(int)
+        def test(feature):
+            return feature + 1
+
+        fg = feature_group.FeatureGroup(
+            name="test",
+            version=1,
+            featurestore_id=feature_store_id,
+            transformation_functions=[test],
+            primary_key=[],
+            partition_key=[],
+        )
+
+        # Act
+        fg_engine.insert(
+            feature_group=fg,
+            feature_dataframe=None,
+            overwrite=None,
+            operation=None,
+            storage=None,
+            write_options=None,
+        )
+
+        # Assert
+        assert mock_fg_api.return_value.delete_content.call_count == 0
+        assert mock_engine_get_instance.return_value.save_dataframe.call_count == 1
+        assert (
+            mock_engine_get_instance.return_value._apply_transformation_function.call_count
+            == 1
+        )
+
     def test_insert_id(self, mocker):
         # Arrange
         feature_store_id = 99
@@ -709,7 +802,7 @@ def test_append_features(self, mocker):
 
         # Assert
         assert (
-            mock_engine_get_instance.return_value.save_empty_dataframe.call_count == 1
+            mock_engine_get_instance.return_value.update_table_schema.call_count == 1
         )
         assert len(mock_fg_engine_update_features_metadata.call_args[0][1]) == 4
 
@@ -909,6 +1002,59 @@ def test_insert_stream_stream(self, mocker):
             mock_engine_get_instance.return_value.save_stream_dataframe.call_count == 1
         )
 
+    def test_insert_stream_stream_transformation_functions(self, mocker):
+        # Arrange
+        feature_store_id = 99
+
+        mocker.patch("hsfs.engine.get_type")
+        mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
+        mocker.patch(
+            "hsfs.core.feature_group_engine.FeatureGroupEngine.save_feature_group_metadata"
+        )
+        mocker.patch(
+            "hsfs.core.feature_group_engine.FeatureGroupEngine._verify_schema_compatibility"
+        )
+
+        @udf(int)
+        def test(feature):
+            return feature + 1
+
+        fg_engine = feature_group_engine.FeatureGroupEngine(
+            feature_store_id=feature_store_id
+        )
+
+        fg = feature_group.FeatureGroup(
+            name="test",
+            version=1,
+            featurestore_id=feature_store_id,
+            primary_key=[],
+            partition_key=[],
+            transformation_functions=[test],
+            stream=True,
+        )
+
+        # Act
+        fg_engine.insert_stream(
+            feature_group=fg,
+            dataframe=None,
+            query_name=None,
+            output_mode=None,
+            await_termination=None,
+            timeout=None,
+            checkpoint_dir=None,
+            write_options=None,
+        )
+
+        # Assert
+        assert mock_engine_get_instance.return_value.save_dataframe.call_count == 0
+        assert (
+            mock_engine_get_instance.return_value.save_stream_dataframe.call_count == 1
+        )
+        assert (
+            mock_engine_get_instance.return_value._apply_transformation_function.call_count
+            == 1
+        )
+
     def test_insert_stream_online_enabled_id(self, mocker):
         # Arrange
         feature_store_id = 99
diff --git a/python/tests/core/test_kafka_engine.py b/python/tests/core/test_kafka_engine.py
index e6bb48297..88085689e 100644
--- a/python/tests/core/test_kafka_engine.py
+++ b/python/tests/core/test_kafka_engine.py
@@ -340,7 +340,7 @@ def test_kafka_get_offsets_high(self, mocker):
         )
 
         # Assert
-        assert result == f" -initialCheckPointString {topic_name},0:11"
+        assert result == f"{topic_name},0:11"
 
     def test_kafka_get_offsets_low(self, mocker):
         # Arrange
@@ -372,7 +372,7 @@ def test_kafka_get_offsets_low(self, mocker):
         )
 
         # Assert
-        assert result == f" -initialCheckPointString {topic_name},0:0"
+        assert result == f"{topic_name},0:0"
 
     def test_kafka_get_offsets_no_topic(self, mocker):
         # Arrange
diff --git a/python/tests/engine/test_python.py b/python/tests/engine/test_python.py
index e921787be..ea83f618f 100644
--- a/python/tests/engine/test_python.py
+++ b/python/tests/engine/test_python.py
@@ -1450,52 +1450,6 @@ def test_save_dataframe(self, mocker):
         assert mock_python_engine_write_dataframe_kafka.call_count == 0
         assert mock_python_engine_legacy_save_dataframe.call_count == 1
 
-    def test_save_dataframe_transformation_functions(self, mocker):
-        # Arrange
-        mock_python_engine_write_dataframe_kafka = mocker.patch(
-            "hsfs.engine.python.Engine._write_dataframe_kafka"
-        )
-        mock_python_engine_legacy_save_dataframe = mocker.patch(
-            "hsfs.engine.python.Engine.legacy_save_dataframe"
-        )
-        mock_python_engine_apply_transformations = mocker.patch(
-            "hsfs.engine.python.Engine._apply_transformation_function"
-        )
-
-        python_engine = python.Engine()
-
-        @udf(int)
-        def test(feature):
-            return feature + 1
-
-        fg = feature_group.FeatureGroup(
-            name="test",
-            version=1,
-            featurestore_id=99,
-            primary_key=[],
-            partition_key=[],
-            id=10,
-            stream=False,
-            transformation_functions=[test],
-        )
-
-        # Act
-        python_engine.save_dataframe(
-            feature_group=fg,
-            dataframe=None,
-            operation=None,
-            online_enabled=None,
-            storage=None,
-            offline_write_options=None,
-            online_write_options=None,
-            validation_id=None,
-        )
-
-        # Assert
-        assert mock_python_engine_write_dataframe_kafka.call_count == 0
-        assert mock_python_engine_legacy_save_dataframe.call_count == 1
-        assert mock_python_engine_apply_transformations.call_count == 1
-
     def test_save_dataframe_stream(self, mocker):
         # Arrange
         mock_python_engine_write_dataframe_kafka = mocker.patch(
@@ -2565,15 +2519,22 @@ def test_save_stream_dataframe(self):
             == "Stream ingestion is not available on Python environments, because it requires Spark as engine."
         )
 
-    def test_save_empty_dataframe(self):
+    def test_update_table_schema(self, mocker):
         # Arrange
+        mock_fg_api = mocker.patch("hsfs.core.feature_group_api.FeatureGroupApi")
+
         python_engine = python.Engine()
 
+        mock_fg_api.return_value.update_table_schema.return_value.job = job.Job(
+            1, "test_job", None, None, None, None
+        )
+
         # Act
-        result = python_engine.save_empty_dataframe(feature_group=None)
+        result = python_engine.update_table_schema(feature_group=None)
 
         # Assert
         assert result is None
+        assert mock_fg_api.return_value.update_table_schema.call_count == 1
 
     def test_get_app_options(self, mocker):
         # Arrange
@@ -3456,6 +3417,88 @@ def test_get_unique_values(self):
         assert 2 in result
         assert 3 in result
 
+    def test_apply_transformation_function_missing_feature_on_demand_transformations(
+        self, mocker
+    ):
+        # Arrange
+        mocker.patch("hopsworks_common.client.get_instance")
+        hopsworks_common.connection._hsfs_engine_type = "python"
+        python_engine = python.Engine()
+
+        @udf(int)
+        def add_one(col1):
+            return col1 + 1
+
+        fg = feature_group.FeatureGroup(
+            name="test1",
+            version=1,
+            featurestore_id=99,
+            primary_key=[],
+            partition_key=[],
+            features=[feature.Feature("id"), feature.Feature("tf_name")],
+            transformation_functions=[add_one("missing_col1")],
+            id=11,
+            stream=False,
+        )
+
+        df = pd.DataFrame(data={"tf_name": [1, 2]})
+
+        # Act
+        with pytest.raises(exceptions.FeatureStoreException) as exception:
+            python_engine._apply_transformation_function(
+                transformation_functions=fg.transformation_functions, dataset=df
+            )
+        print(str(exception.value))
+        assert (
+            str(exception.value)
+            == "The following feature(s): `missing_col1`, specified in the on-demand transformation function 'add_one' are not present in the dataframe being inserted into the feature group. "
+            "Please verify that the correct feature names are used in the transformation function and that these features exist in the dataframe being inserted."
+        )
+
+    def test_apply_transformation_function_missing_feature_model_dependent_transformations(
+        self, mocker
+    ):
+        # Arrange
+        mocker.patch("hopsworks_common.client.get_instance")
+        hopsworks_common.connection._hsfs_engine_type = "python"
+        python_engine = python.Engine()
+
+        @udf(int)
+        def add_one(col1):
+            return col1 + 1
+
+        fg = feature_group.FeatureGroup(
+            name="test1",
+            version=1,
+            featurestore_id=99,
+            primary_key=[],
+            partition_key=[],
+            features=[feature.Feature("id"), feature.Feature("tf_name")],
+            id=11,
+            stream=False,
+        )
+
+        fv = feature_view.FeatureView(
+            name="fv_name",
+            query=fg.select_all(),
+            featurestore_id=99,
+            transformation_functions=[add_one("missing_col1")],
+        )
+
+        df = pd.DataFrame(data={"tf_name": [1, 2]})
+
+        # Act
+        with pytest.raises(exceptions.FeatureStoreException) as exception:
+            python_engine._apply_transformation_function(
+                transformation_functions=fv.transformation_functions, dataset=df
+            )
+        print(str(exception.value))
+        assert (
+            str(exception.value)
+            == "The following feature(s): `missing_col1`, specified in the model-dependent transformation function 'add_one' are not present in the feature view. "
+            "Please verify that the correct features are specified in the transformation function."
+        )
+
     def test_materialization_kafka(self, mocker):
         # Arrange
         mocker.patch("hsfs.core.kafka_engine.get_kafka_config", return_value={})
@@ -3526,7 +3569,7 @@ def test_materialization_kafka_first_job_execution(self, mocker):
         mocker.patch("hsfs.util.get_job_url")
         mocker.patch(
             "hsfs.core.kafka_engine.kafka_get_offsets",
-            return_value=" tests_offsets",
+            return_value="tests_offsets",
         )
         mocker.patch(
             "hsfs.core.job_api.JobApi.last_execution",
@@ -3568,7 +3611,7 @@ def test_materialization_kafka_first_job_execution(self, mocker):
         # Assert
         assert mock_python_engine_kafka_produce.call_count == 4
         job_mock.run.assert_called_once_with(
-            args="defaults tests_offsets",
+            args="defaults -initialCheckPointString tests_offsets",
             await_termination=False,
         )
 
@@ -3584,7 +3627,7 @@ def test_materialization_kafka_skip_offsets(self, mocker):
         mocker.patch("hsfs.util.get_job_url")
         mocker.patch(
             "hsfs.core.kafka_engine.kafka_get_offsets",
-            return_value=" tests_offsets",
+            return_value="tests_offsets",
         )
 
         mocker.patch("hopsworks_common.client.get_instance")
@@ -3625,7 +3668,7 @@ def test_materialization_kafka_skip_offsets(self, mocker):
         # Assert
         assert mock_python_engine_kafka_produce.call_count == 4
         job_mock.run.assert_called_once_with(
-            args="defaults tests_offsets",
+            args="defaults -initialCheckPointString tests_offsets",
             await_termination=False,
         )
 
@@ -3641,7 +3684,7 @@ def test_materialization_kafka_topic_doesnt_exist(self, mocker):
         mocker.patch("hsfs.util.get_job_url")
         mocker.patch(
             "hsfs.core.kafka_engine.kafka_get_offsets",
-            side_effect=["", " tests_offsets"],
+            side_effect=["", "tests_offsets"],
         )
 
         mocker.patch("hopsworks_common.client.get_instance")
@@ -3679,7 +3722,7 @@ def test_materialization_kafka_topic_doesnt_exist(self, mocker):
         # Assert
         assert mock_python_engine_kafka_produce.call_count == 4
         job_mock.run.assert_called_once_with(
-            args="defaults tests_offsets",
+            args="defaults -initialCheckPointString tests_offsets",
             await_termination=False,
         )
 
diff --git a/python/tests/engine/test_spark.py b/python/tests/engine/test_spark.py
index fb3f6e08f..da3449270 100644
--- a/python/tests/engine/test_spark.py
+++ b/python/tests/engine/test_spark.py
@@ -15,6 +15,8 @@
 #
 from __future__ import annotations
 
+from unittest.mock import call
+
 import hopsworks_common
 import numpy
 import pandas as pd
@@ -39,6 +41,7 @@
 from hsfs.training_dataset_feature import TrainingDatasetFeature
 from hsfs.transformation_function import TransformationType
 from pyspark.sql import DataFrame
+from pyspark.sql.functions import lit
 from pyspark.sql.types import (
     ArrayType,
     BinaryType,
@@ -202,6 +205,9 @@ def test_register_hudi_temporary_table(self, mocker):
         # Arrange
         mock_hudi_engine = mocker.patch("hsfs.core.hudi_engine.HudiEngine")
         mocker.patch("hsfs.feature_group.FeatureGroup.from_response_json")
+        mock_reconcile_schema = mocker.patch(
+            "hsfs.engine.spark.Engine.reconcile_schema"
+        )
 
         spark_engine = spark.Engine()
 
@@ -219,6 +225,33 @@ def test_register_hudi_temporary_table(self, mocker):
 
         # Assert
         assert mock_hudi_engine.return_value.register_temporary_table.call_count == 1
+        assert mock_reconcile_schema.call_count == 1
+
+    def test_register_delta_temporary_table(self, mocker):
+        # Arrange
+        mock_delta_engine = mocker.patch("hsfs.core.delta_engine.DeltaEngine")
+        mocker.patch("hsfs.feature_group.FeatureGroup.from_response_json")
+        mock_reconcile_schema = mocker.patch(
+            "hsfs.engine.spark.Engine.reconcile_schema"
+        )
+
+        spark_engine = spark.Engine()
+
+        hudi_fg_alias = hudi_feature_group_alias.HudiFeatureGroupAlias(
+            feature_group=None, alias=None
+        )
+
+        # Act
+        spark_engine.register_delta_temporary_table(
+            delta_fg_alias=hudi_fg_alias,
+            feature_store_id=None,
+            feature_store_name=None,
+            read_options=None,
+        )
+
+        # Assert
+        assert mock_delta_engine.return_value.register_temporary_table.call_count == 1
+        assert mock_reconcile_schema.call_count == 1
 
     def test_return_dataframe_type_default(self, mocker):
         # Arrange
@@ -605,51 +638,6 @@ def test_save_dataframe(self, mocker):
         assert mock_spark_engine_save_online_dataframe.call_count == 0
         assert mock_spark_engine_save_offline_dataframe.call_count == 1
 
-    def test_save_dataframe_transformations(self, mocker):
-        # Arrange
-        mock_spark_engine_save_online_dataframe = mocker.patch(
-            "hsfs.engine.spark.Engine._save_online_dataframe"
-        )
-        mock_spark_engine_save_offline_dataframe = mocker.patch(
-            "hsfs.engine.spark.Engine._save_offline_dataframe"
-        )
-        mock_spark_engine_apply_transformations = mocker.patch(
-            "hsfs.engine.spark.Engine._apply_transformation_function"
-        )
-
-        spark_engine = spark.Engine()
-
-        @udf(int)
-        def test(feature):
-            return feature + 1
-
-        fg = feature_group.FeatureGroup(
-            name="test",
-            version=1,
-            featurestore_id=99,
-            primary_key=[],
-            partition_key=[],
-            id=10,
-            transformation_functions=[test],
-        )
-
-        # Act
-        spark_engine.save_dataframe(
-            feature_group=fg,
-            dataframe=None,
-            operation=None,
-            online_enabled=None,
-            storage=None,
-            offline_write_options=None,
-            online_write_options=None,
-            validation_id=None,
-        )
-
-        # Assert
-        assert mock_spark_engine_save_online_dataframe.call_count == 0
-        assert mock_spark_engine_save_offline_dataframe.call_count == 1
-        assert mock_spark_engine_apply_transformations.call_count == 1
-
     def test_save_dataframe_storage_offline(self, mocker):
         # Arrange
         mock_spark_engine_save_online_dataframe = mocker.patch(
@@ -873,130 +861,8 @@ def test_save_stream_dataframe(self, mocker, backend_fixtures):
             "hopsworks_common.client.get_instance"
         )
         mocker.patch("hopsworks_common.client._is_external", return_value=False)
-        mocker.patch("hsfs.engine.spark.Engine._encode_complex_features")
-        mock_spark_engine_online_fg_to_avro = mocker.patch(
-            "hsfs.engine.spark.Engine._online_fg_to_avro"
-        )
-
-        mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
-        mock_engine_get_instance.return_value.add_file.return_value = (
-            "result_from_add_file"
-        )
-
-        mock_storage_connector_api = mocker.patch(
-            "hsfs.core.storage_connector_api.StorageConnectorApi"
-        )
-        json = backend_fixtures["storage_connector"]["get_kafka_external"]["response"]
-        sc = storage_connector.StorageConnector.from_response_json(json)
-        mock_storage_connector_api.return_value.get_kafka_connector.return_value = sc
-
-        spark_engine = spark.Engine()
-
-        fg = feature_group.FeatureGroup(
-            name="test",
-            version=1,
-            featurestore_id=99,
-            primary_key=[],
-            partition_key=[],
-            id=10,
-            online_topic_name="test_online_topic_name",
-        )
-        fg.feature_store = mocker.Mock()
-        project_id = 1
-        fg.feature_store.project_id = project_id
-
-        mock_common_client_get_instance.return_value._project_name = "test_project_name"
-
-        # Act
-        spark_engine.save_stream_dataframe(
-            feature_group=fg,
-            dataframe=None,
-            query_name=None,
-            output_mode="test_mode",
-            await_termination=None,
-            timeout=None,
-            checkpoint_dir=None,
-            write_options={"test_name": "test_value"},
-        )
-
-        # Assert
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.call_args[0][0]
-            == "headers"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
-                0
-            ][0]
-            == "test_mode"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
-                0
-            ][0]
-            == "kafka"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
-                0
-            ][0]
-            == "checkpointLocation"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
-                0
-            ][1]
-            == f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}-checkpoint"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
-                1
-            ]
-            == {
-                "kafka.bootstrap.servers": "test_bootstrap_servers",
-                "kafka.security.protocol": "test_security_protocol",
-                "kafka.ssl.endpoint.identification.algorithm": "test_ssl_endpoint_identification_algorithm",
-                "kafka.ssl.key.password": "test_ssl_key_password",
-                "kafka.ssl.keystore.location": "result_from_add_file",
-                "kafka.ssl.keystore.password": "test_ssl_keystore_password",
-                "kafka.ssl.truststore.location": "result_from_add_file",
-                "kafka.ssl.truststore.password": "test_ssl_truststore_password",
-                "kafka.test_option_name": "test_option_value",
-                "test_name": "test_value",
-            }
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
-                0
-            ][0]
-            == "topic"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
-                0
-            ][1]
-            == "test_online_topic_name"
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
-                0
-            ][0]
-            == self._get_spark_query_name(project_id, fg)
-        )
-        assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
-            == 0
-        )
-
-    def test_save_stream_dataframe_transformations(self, mocker, backend_fixtures):
-        # Arrange
-        mock_common_client_get_instance = mocker.patch(
-            "hopsworks_common.client.get_instance"
-        )
-        mocker.patch("hopsworks_common.client._is_external", return_value=False)
-        mocker.patch("hsfs.engine.spark.Engine._encode_complex_features")
-        mock_spark_engine_online_fg_to_avro = mocker.patch(
-            "hsfs.engine.spark.Engine._online_fg_to_avro"
+        mock_spark_engine_serialize_to_avro = mocker.patch(
+            "hsfs.engine.spark.Engine._serialize_to_avro"
         )
 
         mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
@@ -1007,21 +873,12 @@ def test_save_stream_dataframe_transformations(self, mocker, backend_fixtures):
         mock_storage_connector_api = mocker.patch(
             "hsfs.core.storage_connector_api.StorageConnectorApi"
         )
-
-        mock_spark_engine_apply_transformations = mocker.patch(
-            "hsfs.engine.spark.Engine._apply_transformation_function"
-        )
-
         json = backend_fixtures["storage_connector"]["get_kafka_external"]["response"]
         sc = storage_connector.StorageConnector.from_response_json(json)
         mock_storage_connector_api.return_value.get_kafka_connector.return_value = sc
 
         spark_engine = spark.Engine()
 
-        @udf(int)
-        def test(feature):
-            return feature + 1
-
         fg = feature_group.FeatureGroup(
             name="test",
             version=1,
@@ -1030,7 +887,6 @@ def test(feature):
             partition_key=[],
             id=10,
             online_topic_name="test_online_topic_name",
-            transformation_functions=[test],
         )
         fg.feature_store = mocker.Mock()
         project_id = 1
@@ -1052,35 +908,35 @@ def test(feature):
 
         # Assert
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.call_args[0][0]
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.call_args[0][0]
             == "headers"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
                 0
             ][0]
             == "test_mode"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
                 0
             ][0]
             == "kafka"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][0]
             == "checkpointLocation"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][1]
             == f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}-checkpoint"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
                 1
             ]
             == {
@@ -1097,28 +953,27 @@ def test(feature):
             }
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][0]
             == "topic"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][1]
             == "test_online_topic_name"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
                 0
             ][0]
             == self._get_spark_query_name(project_id, fg)
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
             == 0
         )
-        assert mock_spark_engine_apply_transformations.call_count == 1
 
     def test_save_stream_dataframe_query_name(self, mocker, backend_fixtures):
         # Arrange
@@ -1126,9 +981,8 @@ def test_save_stream_dataframe_query_name(self, mocker, backend_fixtures):
             "hopsworks_common.client.get_instance"
         )
         mocker.patch("hopsworks_common.client._is_external", return_value=False)
-        mocker.patch("hsfs.engine.spark.Engine._encode_complex_features")
-        mock_spark_engine_online_fg_to_avro = mocker.patch(
-            "hsfs.engine.spark.Engine._online_fg_to_avro"
+        mock_spark_engine_serialize_to_avro = mocker.patch(
+            "hsfs.engine.spark.Engine._serialize_to_avro"
         )
 
         mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
@@ -1172,35 +1026,35 @@ def test_save_stream_dataframe_query_name(self, mocker, backend_fixtures):
 
         # Assert
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.call_args[0][0]
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.call_args[0][0]
             == "headers"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
                 0
             ][0]
             == "test_mode"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
                 0
             ][0]
             == "kafka"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][0]
             == "checkpointLocation"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][1]
             == "/Projects/test_project_name/Resources/test_query_name-checkpoint"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
                 1
             ]
             == {
@@ -1217,25 +1071,25 @@ def test_save_stream_dataframe_query_name(self, mocker, backend_fixtures):
             }
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][0]
             == "topic"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][1]
             == "test_online_topic_name"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
                 0
             ][0]
             == "test_query_name"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
             == 0
         )
 
@@ -1251,9 +1105,8 @@ def test_save_stream_dataframe_checkpoint_dir(self, mocker, backend_fixtures):
             "hopsworks_common.client.get_instance"
         )
         mocker.patch("hopsworks_common.client._is_external", return_value=False)
-        mocker.patch("hsfs.engine.spark.Engine._encode_complex_features")
-        mock_spark_engine_online_fg_to_avro = mocker.patch(
-            "hsfs.engine.spark.Engine._online_fg_to_avro"
+        mock_spark_engine_serialize_to_avro = mocker.patch(
+            "hsfs.engine.spark.Engine._serialize_to_avro"
         )
 
         mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
@@ -1299,35 +1152,35 @@ def test_save_stream_dataframe_checkpoint_dir(self, mocker, backend_fixtures):
 
         # Assert
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.call_args[0][0]
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.call_args[0][0]
             == "headers"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
                 0
             ][0]
             == "test_mode"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
                 0
             ][0]
             == "kafka"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][0]
             == "checkpointLocation"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][1]
             == "test_checkpoint_dir"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
                 1
             ]
             == {
@@ -1344,25 +1197,25 @@ def test_save_stream_dataframe_checkpoint_dir(self, mocker, backend_fixtures):
             }
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][0]
             == "topic"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][1]
             == "test_online_topic_name"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
                 0
             ][0]
             == self._get_spark_query_name(project_id, fg)
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
             == 0
         )
 
@@ -1372,9 +1225,8 @@ def test_save_stream_dataframe_await_termination(self, mocker, backend_fixtures)
             "hopsworks_common.client.get_instance"
         )
         mocker.patch("hopsworks_common.client._is_external", return_value=False)
-        mocker.patch("hsfs.engine.spark.Engine._encode_complex_features")
-        mock_spark_engine_online_fg_to_avro = mocker.patch(
-            "hsfs.engine.spark.Engine._online_fg_to_avro"
+        mock_spark_engine_serialize_to_avro = mocker.patch(
+            "hsfs.engine.spark.Engine._serialize_to_avro"
         )
 
         mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
@@ -1420,35 +1272,35 @@ def test_save_stream_dataframe_await_termination(self, mocker, backend_fixtures)
 
         # Assert
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.call_args[0][0]
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.call_args[0][0]
             == "headers"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.call_args[
                 0
             ][0]
             == "test_mode"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.call_args[
                 0
             ][0]
             == "kafka"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][0]
             == "checkpointLocation"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.call_args[
                 0
             ][1]
             == f"/Projects/test_project_name/Resources/{self._get_spark_query_name(project_id, fg)}-checkpoint"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.call_args[
                 1
             ]
             == {
@@ -1465,29 +1317,29 @@ def test_save_stream_dataframe_await_termination(self, mocker, backend_fixtures)
             }
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][0]
             == "topic"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.call_args[
                 0
             ][1]
             == "test_online_topic_name"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.call_args[
                 0
             ][0]
             == self._get_spark_query_name(project_id, fg)
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_count
             == 1
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.writeStream.outputMode.return_value.format.return_value.option.return_value.options.return_value.option.return_value.queryName.return_value.start.return_value.awaitTermination.call_args[
                 0
             ][0]
             == 123
@@ -1630,9 +1482,8 @@ def test_save_online_dataframe(self, mocker, backend_fixtures):
         # Arrange
         mocker.patch("hopsworks_common.client.get_instance")
         mocker.patch("hopsworks_common.client._is_external", return_value=False)
-        mocker.patch("hsfs.engine.spark.Engine._encode_complex_features")
-        mock_spark_engine_online_fg_to_avro = mocker.patch(
-            "hsfs.engine.spark.Engine._online_fg_to_avro"
+        mock_spark_engine_serialize_to_avro = mocker.patch(
+            "hsfs.engine.spark.Engine._serialize_to_avro"
         )
 
         mock_engine_get_instance = mocker.patch("hsfs.engine.get_instance")
@@ -1668,19 +1519,19 @@ def test_save_online_dataframe(self, mocker, backend_fixtures):
         )
 
         # Assert
-        assert mock_spark_engine_online_fg_to_avro.call_count == 1
+        assert mock_spark_engine_serialize_to_avro.call_count == 1
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.call_args[0][0]
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.call_args[0][0]
             == "headers"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.write.format.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.write.format.call_args[
                 0
             ][0]
             == "kafka"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.write.format.return_value.options.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.write.format.return_value.options.call_args[
                 1
             ]
             == {
@@ -1697,37 +1548,40 @@ def test_save_online_dataframe(self, mocker, backend_fixtures):
             }
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.write.format.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.write.format.return_value.options.return_value.option.call_args[
                 0
             ][0]
             == "topic"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.write.format.return_value.options.return_value.option.call_args[
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.write.format.return_value.options.return_value.option.call_args[
                 0
             ][1]
             == "test_online_topic_name"
         )
         assert (
-            mock_spark_engine_online_fg_to_avro.return_value.withColumn.return_value.write.format.return_value.options.return_value.option.return_value.save.call_count
+            mock_spark_engine_serialize_to_avro.return_value.withColumn.return_value.write.format.return_value.options.return_value.option.return_value.save.call_count
             == 1
         )
 
-    def test_encode_complex_features(self, mocker):
+    def test_serialize_to_avro(self, mocker):
         # Arrange
-        mocker.patch("hopsworks_common.client.get_instance")
-        mocker.patch(
-            "hsfs.feature_group.FeatureGroup.get_complex_features",
-            return_value=["col_1"],
-        )
-        mocker.patch("hsfs.feature_group.FeatureGroup._get_feature_avro_schema")
-
         spark_engine = spark.Engine()
 
-        d = {"col_0": ["test_1", "test_2"], "col_1": ["test_1", "test_2"]}
-        df = pd.DataFrame(data=d)
+        mock_to_avro = mocker.patch("hsfs.engine.spark.to_avro")
+        mock_to_avro.return_value = lit(b"111")
 
-        spark_df = spark_engine._spark_session.createDataFrame(df)
+        fg_data = []
+        fg_data.append(("ekarson", ["GRAVITY RUSH 2", "KING'S QUEST"]))
+        fg_data.append(("ratmilkdrinker", ["NBA 2K", "CALL OF DUTY"]))
+        pandas_df = pd.DataFrame(fg_data, columns=["account_id", "last_played_games"])
+
+        df = spark_engine._spark_session.createDataFrame(pandas_df)
+
+        features = [
+            feature.Feature(name="account_id", type="str"),
+            feature.Feature(name="last_played_games", type="array"),
+        ]
 
         fg = feature_group.FeatureGroup(
             name="test",
@@ -1736,37 +1590,44 @@ def test_encode_complex_features(self, mocker):
             primary_key=[],
             partition_key=[],
             id=10,
+            features=features,
         )
-        fg._subject = {"schema": '{"fields": [{"name": "col_0"}]}'}
-
-        expected = pd.DataFrame(data={"col_0": ["test_1", "test_2"]})
+        fg._subject = {
+            "id": 1025,
+            "subject": "fg_1",
+            "version": 1,
+            "schema": '{"type":"record","name":"fg_1","namespace":"test_featurestore.db","fields":[{"name":"account_id","type":["null","string"]},{"name":"last_played_games","type":["null",{"type":"array","items":["null","string"]}]}]}',
+        }
 
         # Act
-        result = spark_engine._encode_complex_features(
+        serialized_df = spark_engine._serialize_to_avro(
             feature_group=fg,
-            dataframe=spark_df,
+            dataframe=df,
         )
 
         # Assert
-        result_df = result.toPandas()
-        assert list(result_df) == list(expected)
-        for column in list(result_df):
-            assert result_df[column].equals(expected[column])
-
-    def test_encode_complex_features_col_in_complex_features(self, mocker):
-        # Arrange
-        mocker.patch(
-            "hsfs.feature_group.FeatureGroup.get_complex_features",
-            return_value=["col_0"],
+        assert (
+            serialized_df.schema.json()
+            == '{"fields":[{"metadata":{},"name":"key","nullable":false,"type":"binary"},{"metadata":{},"name":"value","nullable":false,"type":"binary"}],"type":"struct"}'
         )
-        mocker.patch("hsfs.feature_group.FeatureGroup._get_feature_avro_schema")
 
+    """ Need spark to run these tests properly
+    def test_deserialize_from_avro(self, mocker):
+        # Arrange
         spark_engine = spark.Engine()
 
-        d = {"col_0": ["test_1", "test_2"], "col_1": ["test_1", "test_2"]}
-        df = pd.DataFrame(data=d)
+        data = []
+        data.append((b"2121", b"21212121"))
+        data.append((b"1212", b"12121212"))
+        pandas_df = pd.DataFrame(data, columns =["key", "value"])
 
-        spark_df = spark_engine._spark_session.createDataFrame(df)
+        df = spark_engine._spark_session.createDataFrame(pandas_df)
+
+        features = [
+            feature.Feature(name="account_id", type="str"),
+            feature.Feature(name="last_played_games", type="array"),
+            feature.Feature(name="event_time", type="timestamp"),
+        ]
 
         fg = feature_group.FeatureGroup(
             name="test",
@@ -1775,29 +1636,42 @@ def test_encode_complex_features_col_in_complex_features(self, mocker):
             primary_key=[],
             partition_key=[],
             id=10,
+            features=features,
         )
-        fg._subject = {"schema": '{"fields": [{"name": "col_0"}]}'}
+        fg._subject = {
+            'id': 1025,
+            'subject': 'fg_1',
+            'version': 1,
+            'schema': '{"type":"record","name":"fg_1","namespace":"test_featurestore.db","fields":[{"name":"account_id","type":["null","string"]},{"name":"last_played_games","type":["null",{"type":"array","items":["null","string"]}]},{"name":"event_time","type":["null",{"type":"long","logicalType":"timestamp-micros"}]}]}'
+        }
 
         # Act
-        with pytest.raises(
-            TypeError
-        ) as e_info:  # todo look into this (to_avro has to be mocked)
-            spark_engine._encode_complex_features(
-                feature_group=fg,
-                dataframe=spark_df,
-            )
+        deserialized_df = spark_engine._deserialize_from_avro(
+            feature_group=fg,
+            dataframe=df,
+        )
 
         # Assert
-        assert str(e_info.value) == "'JavaPackage' object is not callable"
+        assert deserialized_df.schema.json() == '{"fields":[{"metadata":{},"name":"account_id","nullable":true,"type":"string"},{"metadata":{},"name":"last_played_games","nullable":true,"type":{"containsNull":true,"elementType":"string","type":"array"}},{"metadata":{},"name":"event_time","nullable":true,"type":"timestamp"}],"type":"struct"}'
 
-    def test_online_fg_to_avro(self):
+    def test_serialize_deserialize_avro(self, mocker):
         # Arrange
         spark_engine = spark.Engine()
 
-        d = {"col_0": ["test_1", "test_2"], "col_1": ["test_1", "test_2"]}
-        df = pd.DataFrame(data=d)
+        now = datetime.datetime.now()
 
-        spark_df = spark_engine._spark_session.createDataFrame(df)
+        fg_data = []
+        fg_data.append(("ekarson", ["GRAVITY RUSH 2", "KING'S QUEST"], pd.Timestamp(now.timestamp())))
+        fg_data.append(("ratmilkdrinker", ["NBA 2K", "CALL OF DUTY"], pd.Timestamp(now.timestamp())))
+        pandas_df = pd.DataFrame(fg_data, columns =["account_id", "last_played_games", "event_time"])
+
+        df = spark_engine._spark_session.createDataFrame(pandas_df)
+
+        features = [
+            feature.Feature(name="account_id", type="str"),
+            feature.Feature(name="last_played_games", type="array"),
+            feature.Feature(name="event_time", type="timestamp"),
+        ]
 
         fg = feature_group.FeatureGroup(
             name="test",
@@ -1806,20 +1680,31 @@ def test_online_fg_to_avro(self):
             primary_key=[],
             partition_key=[],
             id=10,
+            features=features,
         )
-        fg._avro_schema = '{"fields": [{"name": "col_0"}]}'
+        fg._subject = {
+            'id': 1025,
+            'subject': 'fg_1',
+            'version': 1,
+            'schema': '{"type":"record","name":"fg_1","namespace":"test_featurestore.db","fields":[{"name":"account_id","type":["null","string"]},{"name":"last_played_games","type":["null",{"type":"array","items":["null","string"]}]},{"name":"event_time","type":["null",{"type":"long","logicalType":"timestamp-micros"}]}]}'
+        }
 
         # Act
-        with pytest.raises(
-            TypeError
-        ) as e_info:  # todo look into this (to_avro has to be mocked)
-            spark_engine._online_fg_to_avro(
-                feature_group=fg,
-                dataframe=spark_df,
-            )
+        serialized_df = spark_engine._serialize_to_avro(
+            feature_group=fg,
+            dataframe=df,
+        )
+
+        deserialized_df = spark_engine._deserialize_from_avro(
+            feature_group=fg,
+            dataframe=serialized_df,
+        )
 
         # Assert
-        assert str(e_info.value) == "'JavaPackage' object is not callable"
+        assert serialized_df.schema.json() == '{"fields":[{"metadata":{},"name":"key","nullable":false,"type":"binary"},{"metadata":{},"name":"value","nullable":false,"type":"binary"}],"type":"struct"}'
+        assert df.schema == deserialized_df.schema
+        assert df.collect() == deserialized_df.collect()
+    """
 
     def test_get_training_data(self, mocker):
         # Arrange
@@ -4389,6 +4274,109 @@ def test_setup_s3_hadoop_conf_legacy(self, mocker):
             "fs.s3a.endpoint", s3_connector.arguments.get("fs.s3a.endpoint")
         )
 
+    def test_setup_s3_hadoop_conf_disable_legacy(self, mocker):
+        # Arrange
+        mock_pyspark_getOrCreate = mocker.patch(
+            "pyspark.sql.session.SparkSession.builder.getOrCreate"
+        )
+
+        spark_engine = spark.Engine()
+
+        s3_connector = storage_connector.S3Connector(
+            id=1,
+            name="test_connector",
+            featurestore_id=99,
+            bucket="bucket-name",
+            access_key="1",
+            secret_key="2",
+            server_encryption_algorithm="3",
+            server_encryption_key="4",
+            session_token="5",
+            arguments=[
+                {"name": "fs.s3a.endpoint", "value": "testEndpoint"},
+                {"name": "fs.s3a.global-conf", "value": "False"},
+            ],
+        )
+
+        # Act
+        result = spark_engine._setup_s3_hadoop_conf(
+            storage_connector=s3_connector,
+            path="s3://_test_path",
+        )
+
+        # Assert
+        assert result == "s3a://_test_path"
+        assert (
+            mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.call_count
+            == 7  # Options should only be set at bucket level
+        )
+        assert (
+            call("fs.s3a.access.key", s3_connector.access_key)
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+        assert (
+            call("fs.s3a.secret.key", s3_connector.secret_key)
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+        assert (
+            call(
+                "fs.s3a.server-side-encryption-algorithm",
+                s3_connector.server_encryption_algorithm,
+            )
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+
+        assert (
+            call(
+                "fs.s3a.server-side-encryption-key", s3_connector.server_encryption_key
+            )
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+
+        assert (
+            call(
+                "fs.s3a.aws.credentials.provider",
+                "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider",
+            )
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+
+        assert (
+            call("fs.s3a.session.token", s3_connector.session_token)
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+
+        assert (
+            call("fs.s3a.endpoint", s3_connector.arguments.get("fs.s3a.endpoint"))
+            not in mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.mock_calls
+        )
+
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.access.key", s3_connector.access_key
+        )
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.secret.key", s3_connector.secret_key
+        )
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.server-side-encryption-algorithm",
+            s3_connector.server_encryption_algorithm,
+        )
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.server-side-encryption-key",
+            s3_connector.server_encryption_key,
+        )
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.aws.credentials.provider",
+            "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider",
+        )
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.session.token", s3_connector.session_token
+        )
+        mock_pyspark_getOrCreate.return_value.sparkContext._jsc.hadoopConfiguration.return_value.set.assert_any_call(
+            "fs.s3a.bucket.bucket-name.endpoint",
+            s3_connector.arguments.get("fs.s3a.endpoint"),
+        )
+
     def test_setup_s3_hadoop_conf_bucket_scope(self, mocker):
         # Arrange
         mock_pyspark_getOrCreate = mocker.patch(
@@ -4514,7 +4502,7 @@ def test_is_spark_dataframe_spark_dataframe(self):
         # Assert
         assert result is True
 
-    def test_save_empty_dataframe(self, mocker):
+    def test_update_table_schema_hudi(self, mocker):
         # Arrange
         mock_spark_engine_save_dataframe = mocker.patch(
             "hsfs.engine.spark.Engine.save_dataframe"
@@ -4534,15 +4522,42 @@ def test_save_empty_dataframe(self, mocker):
             partition_key=[],
             id=10,
             featurestore_name="test_featurestore",
+            time_travel_format="HUDI",
         )
 
         # Act
-        spark_engine.save_empty_dataframe(feature_group=fg)
+        spark_engine.update_table_schema(feature_group=fg)
 
         # Assert
         assert mock_spark_engine_save_dataframe.call_count == 1
         assert mock_spark_read.format.call_count == 1
 
+    def test_update_table_schema_delta(self, mocker):
+        # Arrange
+        mock_spark_read = mocker.patch("pyspark.sql.SparkSession.read")
+        mock_format = mocker.Mock()
+        mock_spark_read.format.return_value = mock_format
+
+        # Arrange
+        spark_engine = spark.Engine()
+
+        fg = feature_group.FeatureGroup(
+            name="test",
+            version=1,
+            featurestore_id=99,
+            primary_key=[],
+            partition_key=[],
+            id=10,
+            featurestore_name="test_featurestore",
+            time_travel_format="DELTA",
+        )
+
+        # Act
+        spark_engine.update_table_schema(feature_group=fg)
+
+        # Assert
+        assert mock_spark_read.format.call_count == 1
+
     def test_apply_transformation_function_single_output_udf_default_mode(self, mocker):
         # Arrange
         mocker.patch("hopsworks_common.client.get_instance")
diff --git a/python/tests/fixtures/model_fixtures.json b/python/tests/fixtures/model_fixtures.json
index 40c0b8002..a937eab40 100644
--- a/python/tests/fixtures/model_fixtures.json
+++ b/python/tests/fixtures/model_fixtures.json
@@ -16,7 +16,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
@@ -42,7 +41,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
@@ -69,7 +67,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
@@ -96,7 +93,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
@@ -123,7 +119,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
@@ -133,6 +128,32 @@
       ]
     }
   },
+  "get_llm": {
+    "response": {
+      "count": 1,
+      "items": [
+        {
+          "id": "5",
+          "name": "llmmodel",
+          "version": 0,
+          "created": "created",
+          "creator": "creator",
+          "environment": "environment.yml",
+          "description": "description",
+          "project_name": "myproject",
+          "metrics": { "acc": 0.7 },
+          "program": "program",
+          "user_full_name": "Full Name",
+          "model_schema": "model_schema.json",
+          "input_example": "input_example.json",
+          "model_registry_id": 1,
+          "tags": [],
+          "framework": "LLM",
+          "href": "test_href"
+        }
+      ]
+    }
+  },
   "get_list": {
     "response": {
       "count": 2,
@@ -150,7 +171,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
@@ -170,7 +190,6 @@
           "program": "program",
           "user_full_name": "Full Name",
           "model_schema": "model_schema.json",
-          "training_dataset": "training_dataset",
           "input_example": "input_example.json",
           "model_registry_id": 1,
           "tags": [],
diff --git a/python/tests/fixtures/model_fixtures.py b/python/tests/fixtures/model_fixtures.py
index 32fe396de..9b3796d05 100644
--- a/python/tests/fixtures/model_fixtures.py
+++ b/python/tests/fixtures/model_fixtures.py
@@ -17,6 +17,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from hsml.llm.model import Model as LLMModel
 from hsml.model import Model as BaseModel
 from hsml.python.model import Model as PythonModel
 from hsml.sklearn.model import Model as SklearnModel
@@ -29,12 +30,14 @@
 MODEL_SKLEARN_ID = 2
 MODEL_TENSORFLOW_ID = 3
 MODEL_TORCH_ID = 4
+MODEL_LLM_ID = 5
 
 MODEL_BASE_NAME = "basemodel"
 MODEL_PYTHON_NAME = "pythonmodel"
 MODEL_SKLEARN_NAME = "sklearnmodel"
 MODEL_TENSORFLOW_NAME = "tensorflowmodel"
 MODEL_TORCH_NAME = "torchmodel"
+MODEL_LLM_NAME = "llmmodel"
 
 # models
 
@@ -63,6 +66,10 @@ def model_tensorflow():
 def model_torch():
     return TorchModel(MODEL_TORCH_ID, MODEL_TORCH_NAME)
 
+@pytest.fixture
+def model_llm():
+    return LLMModel(MODEL_LLM_ID, MODEL_LLM_NAME)
+
 
 # input example
 
diff --git a/python/tests/test_constants.py b/python/tests/test_constants.py
index 7a923d8d8..3c03263bf 100644
--- a/python/tests/test_constants.py
+++ b/python/tests/test_constants.py
@@ -38,6 +38,7 @@ def test_model_framework_constants(self):
             "FRAMEWORK_TORCH": "TORCH",
             "FRAMEWORK_PYTHON": "PYTHON",
             "FRAMEWORK_SKLEARN": "SKLEARN",
+            "FRAMEWORK_LLM": "LLM",
         }
 
         # Assert
@@ -52,26 +53,29 @@ def test_model_framework_constants(self):
 
     def test_model_registry_constants(self):
         # Arrange
-        hopsfs_mount_prefix = {"HOPSFS_MOUNT_PREFIX": "/hopsfs/"}
+        model_registry = {
+            "HOPSFS_MOUNT_PREFIX": "/hopsfs/",
+            "MODEL_FILES_DIR_NAME": "Files",
+        }
 
         # Assert
         self._check_added_modified_or_removed_values(
             constants.MODEL_REGISTRY,
-            num_values=len(hopsfs_mount_prefix),
-            expected_constants=hopsfs_mount_prefix,
+            num_values=len(model_registry),
+            expected_constants=model_registry,
         )
 
     # MODEL_SERVING
 
     def test_model_serving_constants(self):
         # Arrange
-        models_dataset = {"MODELS_DATASET": "Models"}
+        model_serving = {"MODELS_DATASET": "Models", "ARTIFACTS_DIR_NAME": "Artifacts"}
 
         # Assert
         self._check_added_modified_or_removed_values(
             constants.MODEL_SERVING,
-            num_values=len(models_dataset),
-            expected_constants=models_dataset,
+            num_values=len(model_serving),
+            expected_constants=model_serving,
         )
 
     # ARTIFACT_VERSION
@@ -193,6 +197,7 @@ def test_predictor_model_server_constants(self):
         model_servers = {
             "MODEL_SERVER_PYTHON": "PYTHON",
             "MODEL_SERVER_TF_SERVING": "TENSORFLOW_SERVING",
+            "MODEL_SERVER_VLLM": "VLLM",
         }
 
         # Assert
diff --git a/python/tests/test_deployment.py b/python/tests/test_deployment.py
index 4ff91eadd..d9494fe62 100644
--- a/python/tests/test_deployment.py
+++ b/python/tests/test_deployment.py
@@ -145,7 +145,7 @@ def test_save_default(self, mocker, backend_fixtures):
         d.save()
 
         # Assert
-        mock_serving_engine_save.assert_called_once_with(d, 60)
+        mock_serving_engine_save.assert_called_once_with(d, 120)
 
     def test_save(self, mocker, backend_fixtures):
         # Arrange
@@ -176,7 +176,7 @@ def test_start_default(self, mocker, backend_fixtures):
         d.start()
 
         # Assert
-        mock_serving_engine_start.assert_called_once_with(d, await_status=60)
+        mock_serving_engine_start.assert_called_once_with(d, await_status=120)
 
     def test_start(self, mocker, backend_fixtures):
         # Arrange
@@ -207,7 +207,7 @@ def test_stop_default(self, mocker, backend_fixtures):
         d.stop()
 
         # Assert
-        mock_serving_engine_stop.assert_called_once_with(d, await_status=60)
+        mock_serving_engine_stop.assert_called_once_with(d, await_status=120)
 
     def test_stop(self, mocker, backend_fixtures):
         # Arrange
@@ -617,15 +617,17 @@ def test_download_artifact(self, mocker, backend_fixtures):
         # Arrange
         p = self._get_dummy_predictor(mocker, backend_fixtures)
         d = deployment.Deployment(predictor=p)
-        mock_serving_engine_download_artifact = mocker.patch(
-            "hsml.engine.serving_engine.ServingEngine.download_artifact"
+        mock_serving_engine_download_artifact_files = mocker.patch(
+            "hsml.engine.serving_engine.ServingEngine.download_artifact_files"
         )
 
         # Act
-        d.download_artifact()
+        d.download_artifact_files()
 
         # Assert
-        mock_serving_engine_download_artifact.assert_called_once_with(d)
+        mock_serving_engine_download_artifact_files.assert_called_once_with(
+            d, local_path=None
+        )
 
     # get logs
 
diff --git a/python/tests/test_feature_group.py b/python/tests/test_feature_group.py
index 5e01b5a10..ea25bbff3 100644
--- a/python/tests/test_feature_group.py
+++ b/python/tests/test_feature_group.py
@@ -928,6 +928,7 @@ def test_prepare_spark_location_with_s3_connector(self, mocker, backend_fixtures
         # Arrange
         engine = spark.Engine()
         engine_instance = mocker.patch("hsfs.engine.get_instance", return_value=engine)
+        refetch_api = mocker.patch("hsfs.storage_connector.S3Connector.refetch")
         json = backend_fixtures["feature_group"]["get_basic_info"]["response"]
         fg = feature_group.FeatureGroup.from_response_json(json)
         fg._location = f"{fg.name}_{fg.version}"
@@ -939,11 +940,13 @@ def test_prepare_spark_location_with_s3_connector(self, mocker, backend_fixtures
         # Assert
         assert fg.location == path
         engine_instance.assert_called_once()
+        refetch_api.assert_called_once()
 
     def test_prepare_spark_location_with_s3_connector_python(self, mocker, backend_fixtures):
         # Arrange
         engine = python.Engine()
         engine_instance = mocker.patch("hsfs.engine.get_instance", return_value=engine)
+        mocker.patch("hsfs.storage_connector.S3Connector.refetch")
         json = backend_fixtures["feature_group"]["get_basic_info"]["response"]
         fg = feature_group.FeatureGroup.from_response_json(json)
         fg._location = f"{fg.name}_{fg.version}"
diff --git a/python/tests/test_model.py b/python/tests/test_model.py
index b430afd53..44ec19b5b 100644
--- a/python/tests/test_model.py
+++ b/python/tests/test_model.py
@@ -138,6 +138,19 @@ def test_constructor_torch(self, mocker, backend_fixtures):
         # Assert
         self.assert_model(mocker, m, json, MODEL.FRAMEWORK_TORCH)
 
+    def test_constructor_llm(self, mocker, backend_fixtures):
+        # Arrange
+        json = backend_fixtures["model"]["get_llm"]["response"]["items"][0]
+        m_json = copy.deepcopy(json)
+        id = m_json.pop("id")
+        name = m_json.pop("name")
+
+        # Act
+        m = model.Model(id=id, name=name, **m_json)
+
+        # Assert
+        self.assert_model(mocker, m, json, MODEL.FRAMEWORK_LLM)
+
     # save
 
     def test_save(self, mocker, backend_fixtures):
@@ -253,7 +266,9 @@ def test_download(self, mocker, backend_fixtures):
         m.download()
 
         # Assert
-        mock_model_engine_download.assert_called_once_with(model_instance=m)
+        mock_model_engine_download.assert_called_once_with(
+            model_instance=m, local_path=None
+        )
 
     # tags
 
@@ -357,7 +372,6 @@ def assert_model(self, mocker, m, m_json, model_framework):
         assert m.project_name == m_json["project_name"]
         assert m.training_metrics == m_json["metrics"]
         assert m._user_full_name == m_json["user_full_name"]
-        assert m.training_dataset == m_json["training_dataset"]
         assert m.model_registry_id == m_json["model_registry_id"]
 
         if model_framework is None:
diff --git a/python/tests/test_predictor.py b/python/tests/test_predictor.py
index 658e9d8fc..a48c3d877 100644
--- a/python/tests/test_predictor.py
+++ b/python/tests/test_predictor.py
@@ -344,6 +344,14 @@ def test_validate_script_file_py_none(self):
         # Assert
         assert "Predictor scripts are required" in str(e_info.value)
 
+    def test_validate_script_file_llm_none(self):
+        # Act
+        with pytest.raises(ValueError) as e_info:
+            _ = predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_LLM, None)
+
+        # Assert
+        assert "Predictor scripts are required" in str(e_info.value)
+
     def test_validate_script_file_tf_script_file(self):
         # Act
         predictor.Predictor._validate_script_file(
@@ -364,6 +372,10 @@ def test_validate_script_file_py_script_file(self):
         # Act
         predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_PYTHON, "script_file")
 
+    def test_validate_script_file_llm_script_file(self):
+        # Act
+        predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_LLM, "script_file")
+
     # infer model server
 
     def test_infer_model_server_tf(self):
@@ -394,6 +406,13 @@ def test_infer_model_server_py(self):
         # Assert
         assert ms == PREDICTOR.MODEL_SERVER_PYTHON
 
+    def test_infer_model_server_llm(self):
+        # Act
+        ms = predictor.Predictor._infer_model_server(MODEL.FRAMEWORK_LLM)
+
+        # Assert
+        assert ms == PREDICTOR.MODEL_SERVER_VLLM
+
     # default serving tool
 
     def test_get_default_serving_tool_kserve_installed(self, mocker):
diff --git a/python/tests/test_util.py b/python/tests/test_util.py
index ce29ec34d..f92358755 100644
--- a/python/tests/test_util.py
+++ b/python/tests/test_util.py
@@ -28,6 +28,8 @@
 from hopsworks_common.core.constants import HAS_AIOMYSQL, HAS_SQLALCHEMY
 from hsfs.embedding import EmbeddingFeature, EmbeddingIndex
 from hsfs.feature import Feature
+from hsml.llm.model import Model as LLMModel
+from hsml.llm.predictor import Predictor as LLMPredictor
 from hsml.model import Model as BaseModel
 from hsml.predictor import Predictor as BasePredictor
 from hsml.python.model import Model as PythonModel
@@ -105,6 +107,17 @@ def test_set_model_class_torch(self, backend_fixtures):
         assert isinstance(model, TorchModel)
         assert model.framework == MODEL.FRAMEWORK_TORCH
 
+    def test_set_model_class_llm(self, backend_fixtures):
+        # Arrange
+        json = backend_fixtures["model"]["get_llm"]["response"]["items"][0]
+
+        # Act
+        model = util.set_model_class(json)
+
+        # Assert
+        assert isinstance(model, LLMModel)
+        assert model.framework == MODEL.FRAMEWORK_LLM
+
     def test_set_model_class_unsupported(self, backend_fixtures):
         # Arrange
         json = backend_fixtures["model"]["get_base"]["response"]["items"][0]
@@ -385,6 +398,7 @@ def pred_base_spec(model_framework, model_server):
         pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__")
         pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__")
         pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__")
+        pred_llm = mocker.patch("hsml.llm.predictor.Predictor.__init__")
 
         # Act
         predictor = util.get_predictor_for_model(model_base)
@@ -398,6 +412,7 @@ def pred_base_spec(model_framework, model_server):
         pred_sklearn.assert_not_called()
         pred_tensorflow.assert_not_called()
         pred_torch.assert_not_called()
+        pred_llm.assert_not_called()
 
     def test_get_predictor_for_model_python(self, mocker, model_python):
         # Arrange
@@ -408,6 +423,7 @@ def test_get_predictor_for_model_python(self, mocker, model_python):
         pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__")
         pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__")
         pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__")
+        pred_llm = mocker.patch("hsml.llm.predictor.Predictor.__init__")
 
         # Act
         predictor = util.get_predictor_for_model(model_python)
@@ -419,6 +435,7 @@ def test_get_predictor_for_model_python(self, mocker, model_python):
         pred_sklearn.assert_not_called()
         pred_tensorflow.assert_not_called()
         pred_torch.assert_not_called()
+        pred_llm.assert_not_called()
 
     def test_get_predictor_for_model_sklearn(self, mocker, model_sklearn):
         # Arrange
@@ -429,6 +446,7 @@ def test_get_predictor_for_model_sklearn(self, mocker, model_sklearn):
         )
         pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__")
         pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__")
+        pred_llm = mocker.patch("hsml.llm.predictor.Predictor.__init__")
 
         # Act
         predictor = util.get_predictor_for_model(model_sklearn)
@@ -440,6 +458,7 @@ def test_get_predictor_for_model_sklearn(self, mocker, model_sklearn):
         pred_sklearn.assert_called_once()
         pred_tensorflow.assert_not_called()
         pred_torch.assert_not_called()
+        pred_llm.assert_not_called()
 
     def test_get_predictor_for_model_tensorflow(self, mocker, model_tensorflow):
         # Arrange
@@ -450,6 +469,7 @@ def test_get_predictor_for_model_tensorflow(self, mocker, model_tensorflow):
             "hsml.tensorflow.predictor.Predictor.__init__", return_value=None
         )
         pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__")
+        pred_llm = mocker.patch("hsml.llm.predictor.Predictor.__init__")
 
         # Act
         predictor = util.get_predictor_for_model(model_tensorflow)
@@ -461,6 +481,7 @@ def test_get_predictor_for_model_tensorflow(self, mocker, model_tensorflow):
         pred_sklearn.assert_not_called()
         pred_tensorflow.assert_called_once()
         pred_torch.assert_not_called()
+        pred_llm.assert_not_called()
 
     def test_get_predictor_for_model_torch(self, mocker, model_torch):
         # Arrange
@@ -471,6 +492,7 @@ def test_get_predictor_for_model_torch(self, mocker, model_torch):
         pred_torch = mocker.patch(
             "hsml.torch.predictor.Predictor.__init__", return_value=None
         )
+        pred_llm = mocker.patch("hsml.llm.predictor.Predictor.__init__")
 
         # Act
         predictor = util.get_predictor_for_model(model_torch)
@@ -482,6 +504,30 @@ def test_get_predictor_for_model_torch(self, mocker, model_torch):
         pred_sklearn.assert_not_called()
         pred_tensorflow.assert_not_called()
         pred_torch.assert_called_once()
+        pred_llm.assert_not_called()
+
+    def test_get_predictor_for_model_llm(self, mocker, model_llm):
+        # Arrange
+        pred_base = mocker.patch("hsml.predictor.Predictor.__init__")
+        pred_python = mocker.patch("hsml.python.predictor.Predictor.__init__")
+        pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__")
+        pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__")
+        pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__")
+        pred_llm = mocker.patch(
+            "hsml.llm.predictor.Predictor.__init__", return_value=None
+        )
+
+        # Act
+        predictor = util.get_predictor_for_model(model_llm)
+
+        # Assert
+        assert isinstance(predictor, LLMPredictor)
+        pred_base.assert_not_called()
+        pred_python.assert_not_called()
+        pred_sklearn.assert_not_called()
+        pred_tensorflow.assert_not_called()
+        pred_torch.assert_not_called()
+        pred_llm.assert_called_once()
 
     def test_get_predictor_for_model_non_base(self, mocker):
         # Arrange
@@ -490,6 +536,7 @@ def test_get_predictor_for_model_non_base(self, mocker):
         pred_sklearn = mocker.patch("hsml.sklearn.predictor.Predictor.__init__")
         pred_tensorflow = mocker.patch("hsml.tensorflow.predictor.Predictor.__init__")
         pred_torch = mocker.patch("hsml.torch.predictor.Predictor.__init__")
+        pred_llm = mocker.patch("hsml.llm.predictor.Predictor.__init__")
 
         class NonBaseModel:
             pass
@@ -506,6 +553,7 @@ class NonBaseModel:
         pred_sklearn.assert_not_called()
         pred_tensorflow.assert_not_called()
         pred_torch.assert_not_called()
+        pred_llm.assert_not_called()
 
     def test_get_hostname_replaced_url(self, mocker):
         # Arrange
diff --git a/python/tests/utils/schema/test_columnar_schema.py b/python/tests/utils/schema/test_columnar_schema.py
index c01c3c33d..6ddffea5d 100644
--- a/python/tests/utils/schema/test_columnar_schema.py
+++ b/python/tests/utils/schema/test_columnar_schema.py
@@ -57,7 +57,7 @@ def test_constructor_default(self, mocker):
         mock_convert_pandas_series_to_schema.assert_not_called()
         mock_convert_spark_to_schema.assert_not_called()
         mock_convert_td_to_schema.assert_not_called()
-        assert mock_find_spec.call_count == 2
+        assert mock_find_spec.call_count == 1
 
     def test_constructor_list(self, mocker):
         # Arrange
@@ -257,7 +257,7 @@ def test_constructor_hsfs_td(self, mocker):
         mock_convert_pandas_series_to_schema.assert_not_called()
         mock_convert_spark_to_schema.assert_not_called()
         mock_convert_td_to_schema.assert_called_once_with(columnar_obj)
-        assert mock_find_spec.call_count == 2
+        assert mock_find_spec.call_count == 1
 
     # convert list to schema
 
diff --git a/utils/java/pom.xml b/utils/java/pom.xml
index 196978d6c..a3a3026b4 100644
--- a/utils/java/pom.xml
+++ b/utils/java/pom.xml
@@ -5,7 +5,7 @@
 
     <groupId>com.logicalclocks</groupId>
     <artifactId>hsfs-utils</artifactId>
-    <version>4.1.0-SNAPSHOT</version>
+    <version>4.2.0-SNAPSHOT</version>
 
     <properties>
         <hops.version>3.2.0.0-SNAPSHOT</hops.version>
diff --git a/utils/python/hsfs_utils.py b/utils/python/hsfs_utils.py
index 6b8c49311..3cc1eb615 100644
--- a/utils/python/hsfs_utils.py
+++ b/utils/python/hsfs_utils.py
@@ -13,12 +13,14 @@
 hopsfs = pfs.HadoopFileSystem("default", user=os.environ["HADOOP_USER_NAME"])
 from pyspark.sql import SparkSession
 from pyspark.sql.types import StructField, StructType, _parse_datatype_string
+from pyspark.sql.functions import max, expr
 
 import hopsworks
 
+from hsfs import engine
 from hsfs.constructor import query
 from hsfs.statistics_config import StatisticsConfig
-from hsfs.core import feature_monitoring_config_engine, feature_view_engine
+from hsfs.core import feature_monitoring_config_engine, feature_view_engine, kafka_engine
 
 
 def read_job_conf(path: str) -> Dict[Any, Any]:
@@ -258,6 +260,96 @@ def delta_vacuum_fg(spark: SparkSession, job_conf: Dict[Any, Any]) -> None:
 
     entity.delta_vacuum()
 
+def offline_fg_materialization(spark: SparkSession, job_conf: Dict[Any, Any], initial_check_point_string: str) -> None:
+    """
+    Run materialization job on a feature group.
+    """
+    feature_store = job_conf.pop("feature_store")
+    fs = get_feature_store_handle(feature_store)
+
+    entity = fs.get_feature_group(name=job_conf["name"], version=job_conf["version"])
+
+    read_options = kafka_engine.get_kafka_config(
+        entity.feature_store_id, {}, engine="spark"
+    )
+
+    # get offsets
+    offset_location = entity.prepare_spark_location() + "/kafka_offsets"
+    try:
+        if initial_check_point_string:
+            offset_string = json.dumps(_build_starting_offsets(initial_check_point_string))
+        else:
+            offset_string = spark.read.json(offset_location).toJSON().first()
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        # if all else fails read from the beggining
+        initial_check_point_string = kafka_engine.kafka_get_offsets(
+            topic_name=entity._online_topic_name,
+            feature_store_id=entity.feature_store_id,
+            offline_write_options={},
+            high=False,
+        )
+        offset_string = json.dumps(_build_starting_offsets(initial_check_point_string))
+    print(f"startingOffsets: {offset_string}")
+
+    # read kafka topic
+    df = (
+        spark.read.format("kafka")
+        .options(**read_options)
+        .option("subscribe", entity._online_topic_name)
+        .option("startingOffsets", offset_string)
+        .option("includeHeaders", "true")
+        .load()
+        .limit(5000000)
+    )
+
+    # filter only the necassary entries
+    df = df.filter(expr("CAST(filter(headers, header -> header.key = 'featureGroupId')[0].value AS STRING)") == str(entity._id))
+    df = df.filter(expr("CAST(filter(headers, header -> header.key = 'subjectId')[0].value AS STRING)") == str(entity.subject["id"]))
+
+    # deserialize dataframe so that it can be properly saved
+    deserialized_df = engine.get_instance()._deserialize_from_avro(entity, df)
+
+    # insert data
+    entity.stream = False # to make sure we dont write to kafka
+    entity.insert(deserialized_df, storage="offline")
+
+    # update offsets
+    df_offsets = df.groupBy('partition').agg(max('offset').alias('offset')).collect()
+    offset_dict = json.loads(offset_string)
+    for offset_row in df_offsets:
+        offset_dict[f"{entity._online_topic_name}"][f"{offset_row.partition}"] = offset_row.offset + 1
+
+    # save offsets
+    offset_df = spark.createDataFrame([offset_dict])
+    offset_df.coalesce(1).write.mode("overwrite").json(offset_location)
+
+def update_table_schema_fg(spark: SparkSession, job_conf: Dict[Any, Any]) -> None:
+    """
+    Run table schema update job on a feature group.
+    """
+    feature_store = job_conf.pop("feature_store")
+    fs = get_feature_store_handle(feature_store)
+
+    entity = fs.get_feature_group(name=job_conf["name"], version=job_conf["version"])
+
+    entity.stream = False
+    engine.get_instance().update_table_schema(entity)
+
+def _build_starting_offsets(initial_check_point_string: str):
+    if not initial_check_point_string:
+        return ""
+
+    # Split the input string into the topic and partition-offset pairs
+    topic, offsets = initial_check_point_string.split(',', 1)
+    
+    # Split the offsets and build a dictionary from them
+    offsets_dict = {partition: int(offset) for partition, offset in (pair.split(':') for pair in offsets.split(','))}
+    
+    # Create the final dictionary structure
+    result = {topic: offsets_dict}
+    
+    return result
 
 if __name__ == "__main__":
     # Setup spark first so it fails faster in case of args errors
@@ -278,6 +370,8 @@ def delta_vacuum_fg(spark: SparkSession, job_conf: Dict[Any, Any]) -> None:
             "import_fg",
             "run_feature_monitoring",
             "delta_vacuum_fg",
+            "offline_fg_materialization",
+            "update_table_schema_fg",
         ],
         help="Operation type",
     )
@@ -297,6 +391,12 @@ def parse_isoformat_date(da: str) -> datetime:
         help="Job start time",
     )
 
+    parser.add_argument(
+        "-initialCheckPointString",
+        type=str,
+        help="Kafka offset to start consuming from",
+    )
+
     args = parser.parse_args()
     job_conf = read_job_conf(args.path)
 
@@ -318,6 +418,10 @@ def parse_isoformat_date(da: str) -> datetime:
             run_feature_monitoring(job_conf)
         elif args.op == "delta_vacuum_fg":
             delta_vacuum_fg(spark, job_conf)
+        elif args.op == "offline_fg_materialization":
+            offline_fg_materialization(spark, job_conf, args.initialCheckPointString)
+        elif args.op == "update_table_schema_fg":
+            update_table_schema_fg(spark, job_conf)
 
         success = True
     except Exception: