Skip to content

Commit

Permalink
Merge remote-tracking branch 'main/main' into dedup-hsml
Browse files Browse the repository at this point in the history
  • Loading branch information
aversey committed Nov 28, 2024
2 parents d4a0dde + baf922f commit 2beac35
Show file tree
Hide file tree
Showing 83 changed files with 2,017 additions and 844 deletions.
19 changes: 8 additions & 11 deletions docs/js/inject-api-links.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
window.addEventListener("DOMContentLoaded", function () {
var windowPathNameSplits = window.location.pathname.split("/");
var majorVersionRegex = new RegExp("(\\d+[.]\\d+)")
var majorVersionRegex = new RegExp("(\\d+[.]\\d+)");
var latestRegex = new RegExp("latest");
if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/3.0 - URL contains major version
if (majorVersionRegex.test(windowPathNameSplits[1])) { // On landing page docs.hopsworks.api/4.0 - URL contains major version
// Version API dropdown
document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/generated/api/login/";
document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + windowPathNameSplits[1] + "/generated/api/connection_api/";
document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + windowPathNameSplits[1] + "/generated/connection_api/";
} else { // on docs.hopsworks.api/feature-store-api/3.0 / docs.hopsworks.api/hopsworks-api/3.0 / docs.hopsworks.api/machine-learning-api/3.0
document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + windowPathNameSplits[1] + "/javadoc";
} else { // on / docs.hopsworks.api/hopsworks-api/4.0
if (latestRegex.test(windowPathNameSplits[2]) || latestRegex.test(windowPathNameSplits[1])) {
var majorVersion = "latest";
var majorVersion = "latest";
} else {
var apiVersion = windowPathNameSplits[2];
var majorVersion = apiVersion.match(majorVersionRegex)[0];
var apiVersion = windowPathNameSplits[2];
var majorVersion = apiVersion.match(majorVersionRegex)[0];
}
// Version main navigation
document.getElementsByClassName("md-tabs__link")[0].href = "https://docs.hopsworks.ai/" + majorVersion;
Expand All @@ -24,8 +23,6 @@ window.addEventListener("DOMContentLoaded", function () {
document.getElementsByClassName("md-tabs__link")[6].href = "https://docs.hopsworks.ai/" + majorVersion + "/admin/";
// Version API dropdown
document.getElementById("hopsworks_api_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/generated/api/login/";
document.getElementById("hsfs_api_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/generated/api/connection_api/";
document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/feature-store-api/" + majorVersion + "/javadoc";
document.getElementById("hsml_api_link").href = "https://docs.hopsworks.ai/machine-learning-api/" + majorVersion + "/generated/connection_api/";
document.getElementById("hsfs_javadoc_link").href = "https://docs.hopsworks.ai/hopsworks-api/" + majorVersion + "/javadoc";
}
});
2 changes: 1 addition & 1 deletion java/beam/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.1.0-SNAPSHOT</version>
<version>4.2.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
@Override
public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey, boolean onlineEnabled,
StatisticsConfig statisticsConfig, String eventTime, OnlineConfig onlineConfig)
TimeTravelFormat timeTravelFormat, StatisticsConfig statisticsConfig, String eventTime, OnlineConfig onlineConfig)
throws IOException, FeatureStoreException {
throw new UnsupportedOperationException("Not supported for Beam");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@

package com.logicalclocks.hsfs.beam;

import java.io.IOException;
import java.text.ParseException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.beam.sdk.values.PCollection;

import com.logicalclocks.hsfs.Feature;
import com.logicalclocks.hsfs.FeatureGroupBase;
import com.logicalclocks.hsfs.FeatureStoreException;
Expand All @@ -26,19 +34,14 @@
import com.logicalclocks.hsfs.StatisticsConfig;
import com.logicalclocks.hsfs.Storage;
import com.logicalclocks.hsfs.StorageConnector;
import com.logicalclocks.hsfs.beam.engine.FeatureGroupEngine;
import com.logicalclocks.hsfs.TimeTravelFormat;
import com.logicalclocks.hsfs.beam.engine.BeamProducer;
import com.logicalclocks.hsfs.beam.engine.FeatureGroupEngine;
import com.logicalclocks.hsfs.constructor.QueryBase;
import com.logicalclocks.hsfs.metadata.Statistics;

import lombok.Builder;
import lombok.NonNull;
import org.apache.beam.sdk.values.PCollection;

import java.io.IOException;
import java.text.ParseException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class StreamFeatureGroup extends FeatureGroupBase<PCollection<Object>> {

Expand All @@ -48,8 +51,9 @@ public class StreamFeatureGroup extends FeatureGroupBase<PCollection<Object>> {
@Builder
public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey,
boolean onlineEnabled, List<Feature> features, StatisticsConfig statisticsConfig, String onlineTopicName,
String eventTime, OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features,
StatisticsConfig statisticsConfig, String onlineTopicName, String eventTime,
OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
this();
this.featureStore = featureStore;
this.name = name;
Expand All @@ -61,6 +65,7 @@ public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integ
? partitionKeys.stream().map(String::toLowerCase).collect(Collectors.toList()) : null;
this.hudiPrecombineKey = hudiPrecombineKey != null ? hudiPrecombineKey.toLowerCase() : null;
this.onlineEnabled = onlineEnabled;
this.timeTravelFormat = timeTravelFormat != null ? timeTravelFormat : TimeTravelFormat.HUDI;
this.features = features;
this.statisticsConfig = statisticsConfig != null ? statisticsConfig : new StatisticsConfig();
this.onlineTopicName = onlineTopicName;
Expand Down
2 changes: 1 addition & 1 deletion java/flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.1.0-SNAPSHOT</version>
<version>4.2.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,9 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys,
String hudiPrecombineKey, boolean onlineEnabled,
StatisticsConfig statisticsConfig, String eventTime,
OnlineConfig onlineConfig)
TimeTravelFormat timeTravelFormat,
StatisticsConfig statisticsConfig,
String eventTime, OnlineConfig onlineConfig)
throws IOException, FeatureStoreException {
throw new UnsupportedOperationException("Not supported for Flink");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@

package com.logicalclocks.hsfs.flink;

import java.io.IOException;
import java.text.ParseException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.logicalclocks.hsfs.Feature;
import com.logicalclocks.hsfs.FeatureGroupBase;
Expand All @@ -27,22 +36,14 @@
import com.logicalclocks.hsfs.StatisticsConfig;
import com.logicalclocks.hsfs.Storage;
import com.logicalclocks.hsfs.StorageConnector;
import com.logicalclocks.hsfs.TimeTravelFormat;
import com.logicalclocks.hsfs.constructor.QueryBase;

import com.logicalclocks.hsfs.flink.engine.FeatureGroupEngine;
import com.logicalclocks.hsfs.metadata.Statistics;

import com.logicalclocks.hsfs.flink.engine.FeatureGroupEngine;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.NonNull;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;

import java.io.IOException;
import java.text.ParseException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
Expand All @@ -53,9 +54,9 @@ public class StreamFeatureGroup extends FeatureGroupBase<DataStream<?>> {
@Builder
public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey,
boolean onlineEnabled, List<Feature> features, StatisticsConfig statisticsConfig,
String onlineTopicName, String topicName, String notificationTopicName, String eventTime,
OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features,
StatisticsConfig statisticsConfig, String onlineTopicName, String topicName, String notificationTopicName,
String eventTime, OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
this();
this.featureStore = featureStore;
this.name = name;
Expand All @@ -67,6 +68,7 @@ public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integ
? partitionKeys.stream().map(String::toLowerCase).collect(Collectors.toList()) : null;
this.hudiPrecombineKey = hudiPrecombineKey != null ? hudiPrecombineKey.toLowerCase() : null;
this.onlineEnabled = onlineEnabled;
this.timeTravelFormat = timeTravelFormat != null ? timeTravelFormat : TimeTravelFormat.HUDI;
this.features = features;
this.statisticsConfig = statisticsConfig != null ? statisticsConfig : new StatisticsConfig();
this.onlineTopicName = onlineTopicName;
Expand Down
2 changes: 1 addition & 1 deletion java/hsfs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.1.0-SNAPSHOT</version>
<version>4.2.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ public abstract Object getOrCreateStreamFeatureGroup(String name, Integer versio
public abstract Object getOrCreateStreamFeatureGroup(String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys,
String hudiPrecombineKey, boolean onlineEnabled,
StatisticsConfig statisticsConfig, String eventTime,
OnlineConfig onlineConfig)
TimeTravelFormat timeTravelFormat, StatisticsConfig statisticsConfig,
String eventTime, OnlineConfig onlineConfig)
throws IOException, FeatureStoreException;

public abstract Object createExternalFeatureGroup();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@

public enum TimeTravelFormat {
NONE,
HUDI
HUDI,
DELTA
}
2 changes: 1 addition & 1 deletion java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<groupId>com.logicalclocks</groupId>
<artifactId>hsfs-parent</artifactId>
<packaging>pom</packaging>
<version>4.1.0-SNAPSHOT</version>
<version>4.2.0-SNAPSHOT</version>
<modules>
<module>hsfs</module>
<module>spark</module>
Expand Down
2 changes: 1 addition & 1 deletion java/spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<artifactId>hsfs-parent</artifactId>
<groupId>com.logicalclocks</groupId>
<version>4.1.0-SNAPSHOT</version>
<version>4.2.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ public StreamFeatureGroup.StreamFeatureGroupBuilder createStreamFeatureGroup() {
public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version)
throws IOException, FeatureStoreException {
return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, null,
null, null, null, false, null, null, null);
null, null, null, false, TimeTravelFormat.HUDI, null, null, null);
}

/**
Expand Down Expand Up @@ -438,7 +438,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
boolean onlineEnabled, String eventTime)
throws IOException, FeatureStoreException {
return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, null,
primaryKeys, null, null, onlineEnabled, null, eventTime, null);
primaryKeys, null, null, onlineEnabled, TimeTravelFormat.HUDI, null, eventTime, null);
}

/**
Expand Down Expand Up @@ -477,7 +477,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver


return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, null,
primaryKeys, partitionKeys, null, onlineEnabled, null, eventTime, null);
primaryKeys, partitionKeys, null, onlineEnabled, TimeTravelFormat.HUDI, null, eventTime, null);
}

/**
Expand Down Expand Up @@ -506,6 +506,7 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
* the first primary key of the feature group will be used as hudi precombine key.
* @param onlineEnabled Define whether the feature group should be made available also in the online feature store
* for low latency access.
* @param timeTravelFormat Format used for time travel, defaults to `"HUDI"`.
* @param statisticsConfig A configuration object, to generally enable descriptive statistics computation for
* this feature group, `"correlations`" to turn on feature correlation computation,
* `"histograms"` to compute feature value frequencies and `"exact_uniqueness"` to compute
Expand All @@ -523,13 +524,14 @@ public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer ver
public StreamFeatureGroup getOrCreateStreamFeatureGroup(String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys,
String hudiPrecombineKey, boolean onlineEnabled,
StatisticsConfig statisticsConfig, String eventTime,
OnlineConfig onlineConfig)
TimeTravelFormat timeTravelFormat,
StatisticsConfig statisticsConfig,
String eventTime, OnlineConfig onlineConfig)
throws IOException, FeatureStoreException {

return featureGroupEngine.getOrCreateStreamFeatureGroup(this, name, version, description,
primaryKeys, partitionKeys, hudiPrecombineKey, onlineEnabled, statisticsConfig, eventTime,
onlineConfig);
primaryKeys, partitionKeys, hudiPrecombineKey, onlineEnabled, timeTravelFormat,
statisticsConfig, eventTime, onlineConfig);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,40 @@

package com.logicalclocks.hsfs.spark;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import com.logicalclocks.hsfs.spark.constructor.Query;
import com.logicalclocks.hsfs.spark.engine.FeatureGroupEngine;
import com.logicalclocks.hsfs.spark.engine.StatisticsEngine;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.streaming.StreamingQuery;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.logicalclocks.hsfs.EntityEndpointType;
import com.logicalclocks.hsfs.Feature;
import com.logicalclocks.hsfs.FeatureGroupBase;
import com.logicalclocks.hsfs.FeatureStoreException;
import com.logicalclocks.hsfs.HudiOperationType;
import com.logicalclocks.hsfs.JobConfiguration;
import com.logicalclocks.hsfs.OnlineConfig;
import com.logicalclocks.hsfs.StatisticsConfig;
import com.logicalclocks.hsfs.Storage;
import com.logicalclocks.hsfs.StorageConnector;
import com.logicalclocks.hsfs.FeatureGroupBase;
import com.logicalclocks.hsfs.TimeTravelFormat;
import com.logicalclocks.hsfs.metadata.Statistics;
import com.logicalclocks.hsfs.spark.constructor.Query;
import com.logicalclocks.hsfs.spark.engine.FeatureGroupEngine;
import com.logicalclocks.hsfs.spark.engine.StatisticsEngine;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.NonNull;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.streaming.StreamingQuery;

import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@AllArgsConstructor
@JsonIgnoreProperties(ignoreUnknown = true)
public class StreamFeatureGroup extends FeatureGroupBase<Dataset<Row>> {
Expand All @@ -61,9 +61,10 @@ public class StreamFeatureGroup extends FeatureGroupBase<Dataset<Row>> {
@Builder
public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integer version, String description,
List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey,
boolean onlineEnabled, List<Feature> features, StatisticsConfig statisticsConfig,
String onlineTopicName, String topicName, String notificationTopicName, String eventTime,
OnlineConfig onlineConfig, StorageConnector storageConnector, String path) {
boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<Feature> features,
StatisticsConfig statisticsConfig, String onlineTopicName, String topicName,
String notificationTopicName, String eventTime, OnlineConfig onlineConfig,
StorageConnector storageConnector, String path) {
this();
this.featureStore = featureStore;
this.name = name;
Expand All @@ -75,6 +76,7 @@ public StreamFeatureGroup(FeatureStore featureStore, @NonNull String name, Integ
? partitionKeys.stream().map(String::toLowerCase).collect(Collectors.toList()) : null;
this.hudiPrecombineKey = hudiPrecombineKey != null ? hudiPrecombineKey.toLowerCase() : null;
this.onlineEnabled = onlineEnabled;
this.timeTravelFormat = timeTravelFormat != null ? timeTravelFormat : TimeTravelFormat.HUDI;
this.features = features;
this.statisticsConfig = statisticsConfig != null ? statisticsConfig : new StatisticsConfig();
this.onlineTopicName = onlineTopicName;
Expand Down
Loading

0 comments on commit 2beac35

Please sign in to comment.