From df73862ad458f119b1b7f722761b9bb8c65dd61e Mon Sep 17 00:00:00 2001 From: davitbzh <44586065+davitbzh@users.noreply.github.com> Date: Wed, 22 Nov 2023 00:29:04 +0100 Subject: [PATCH] [FSTORE-980] Helper, primary key and event time columns with feature view (#1540) --- .../test/ruby/spec/featureview_query_spec.rb | 206 ++++++++++++++++++ .../api/featurestore/FsQueryBuilder.java | 2 +- .../featureview/FeatureViewBuilder.java | 4 +- .../PreparedStatementResource.java | 9 +- .../api/featurestore/query/QueryResource.java | 21 +- .../PreparedStatementBuilder.java | 30 ++- .../TrainingDatasetDTOBuilder.java | 2 +- .../app/FsJobManagerController.java | 2 +- .../feature/TrainingDatasetFeatureDTO.java | 28 ++- .../featureview/FeatureViewController.java | 2 +- .../featurestore/query/QueryController.java | 39 +++- .../TrainingDatasetController.java | 112 +++++++++- .../TrainingDatasetInputValidation.java | 22 +- .../query/TestConstructorController.java | 2 +- .../TrainingDatasetControllerTest.java | 13 +- .../TrainingDatasetFeature.java | 33 ++- .../hops/hopsworks/restutils/RESTCodes.java | 4 +- 17 files changed, 471 insertions(+), 60 deletions(-) diff --git a/hopsworks-IT/src/test/ruby/spec/featureview_query_spec.rb b/hopsworks-IT/src/test/ruby/spec/featureview_query_spec.rb index 5b4e98eabe..c8bc5e83e1 100644 --- a/hopsworks-IT/src/test/ruby/spec/featureview_query_spec.rb +++ b/hopsworks-IT/src/test/ruby/spec/featureview_query_spec.rb @@ -373,6 +373,212 @@ "WHERE `fg1`.`a_testfeature1` > #{query[:filter][:leftFilter][:value]} AND `fg1`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'" ) end + + + + it "should be able to create sql string from query with or without helper columns" do + project_name = @project.projectname.downcase + featurestore_id = get_featurestore_id(@project.id) + featurestore_name = get_featurestore_name(@project.id) + featuregroup_suffix = short_random_id + + features_1 = [ + {"name": "ts", "type": "TIMESTAMP"}, + {"name": "pk", "type": "INT", "primary": true}, + {"name": "a", "type": "INT"}, + {"name": "b", "type": "INT"}, + {"name": "c", "type": "INT"} + ] + + features_2 = [ + {"name": "ts", "type": "TIMESTAMP"}, + {"name": "pk", "type": "INT", "primary": true}, + {"name": "d", "type": "INT"}, + {"name": "e", "type": "INT"}, + {"name": "f", "type": "INT"} + ] + + features_3 = [ + {"name": "ts", "type": "TIMESTAMP"}, + {"name": "pk", "type": "INT", "primary": true}, + {"name": "g", "type": "INT"}, + {"name": "h", "type": "INT"}, + {"name": "i", "type": "INT"} + ] + + fg1 = create_cached_featuregroup_checked_return_fg(@project.id, featurestore_id, + "test_fg_1#{featuregroup_suffix}", + features: features_1, + event_time: "ts") + + fg2 = create_cached_featuregroup_checked_return_fg(@project.id, featurestore_id, + "test_fg_2#{featuregroup_suffix}", + features: features_2, + event_time: "ts") + fg3 = create_cached_featuregroup_checked_return_fg(@project.id, featurestore_id, + "test_fg_3#{featuregroup_suffix}", + features: features_3, + event_time: "ts") + + json_data = { + name: "feature_view_#{random_id}", + version: 1, + description: "testfeatureviewdescription", + query: { + leftFeatureGroup: {id: fg1[:id], type: fg1[:type]}, + leftFeatures: [{name: "a"}, {name: "b"}, {name: "c"}], + joins: [ + { + query: { + leftFeatureGroup: {id: fg2[:id], type: fg2[:type]}, + leftFeatures: [{name: "d"}, {name: "e"}, {name: "f"}], + joins: [], + filter: nil, + }, + on: [], + leftOn: [{name: "pk"}], + rightOn: [{name: "pk"}], + type: "INNER", + prefix: nil + }, + { + query: { + leftFeatureGroup: {id: fg3[:id], type: fg3[:type]}, + leftFeatures: [{name: "g"}, {name: "h"}, {name: "i"}], + joins: [], + filter: nil, + }, + on: [], + leftOn: [{name: "pk"}], + rightOn: [{name: "pk"}], + type: "INNER", + prefix: nil + } + ], + filter: nil, + }, + features: [ + { + name: "a", + type: "INT", + index: 0, + label: true, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg1 + }, + { + name: "b", + type: "INT", + index: 1, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg1 + }, + { + name: "c", + index: 2, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg1 + }, + { + name: "d", + index: 3, + label: false, + inferenceHelperColumn: true, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg2 + }, + { + name: "e", + index: 4, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg2 + + }, + { + name: "f", + index: 5, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg2 + + }, + { + name: "g", + index: 6, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: true, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg3 + }, + { + name: "h", + index: 7, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg3 + }, + { + name: "i", + index: 8, + label: false, + inferenceHelperColumn: false, + trainingHelperColumn: false, + transformationFunction: nil, + featureGroupFeatureName: nil, + featuregroup: fg3 + } + ], + type: "featureViewDTO" + } + + json_result = create_feature_view_with_json(@project.id, featurestore_id, json_data) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + + # without helper columns + fs_query = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/query/batch?start_time=1234&end_time=4321" + fs_query_result = put "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/query", JSON.parse(fs_query) + parsed_query_result = JSON.parse(fs_query_result) + expect(parsed_query_result['query']).to eql("SELECT `fg2`.`b` `b`, `fg2`.`c` `c`, `fg0`.`e` `e`, `fg0`.`f` `f`, `fg1`.`h` `h`, `fg1`.`i` `i`\nFROM `#{featurestore_name}`.`test_fg_1#{featuregroup_suffix}_1` `fg2`\nINNER JOIN `#{featurestore_name}`.`test_fg_2#{featuregroup_suffix}_1` `fg0` ON `fg2`.`pk` = `fg0`.`pk`\nINNER JOIN `#{featurestore_name}`.`test_fg_3#{featuregroup_suffix}_1` `fg1` ON `fg2`.`pk` = `fg1`.`pk`\nWHERE `fg2`.`ts` >= TIMESTAMP '1970-01-01 12:00:01.000' AND `fg2`.`ts` < TIMESTAMP '1970-01-01 12:00:04.000'") + expect(parsed_query_result['queryOnline']).to eql("SELECT `fg2`.`b` `b`, `fg2`.`c` `c`, `fg0`.`e` `e`, `fg0`.`f` `f`, `fg1`.`h` `h`, `fg1`.`i` `i`\nFROM `#{project_name.downcase}`.`test_fg_1#{featuregroup_suffix}_1` `fg2`\nINNER JOIN `#{project_name.downcase}`.`test_fg_2#{featuregroup_suffix}_1` `fg0` ON `fg2`.`pk` = `fg0`.`pk`\nINNER JOIN `#{project_name.downcase}`.`test_fg_3#{featuregroup_suffix}_1` `fg1` ON `fg2`.`pk` = `fg1`.`pk`\nWHERE `fg2`.`ts` >= TIMESTAMP '1970-01-01 12:00:01.000' AND `fg2`.`ts` < TIMESTAMP '1970-01-01 12:00:04.000'") + expect_status_details(200) + + # with helper columns + fs_query = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/query/batch?start_time=1234&end_time=4321&with_primary_keys=true&with_event_time=true&inference_helper_columns&inference_helper_columns=true&training_helper_columns=true" + fs_query_result = put "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/query", JSON.parse(fs_query) + parsed_query_result = JSON.parse(fs_query_result) + expect(parsed_query_result['query']).to eql("SELECT `fg2`.`b` `b`, `fg2`.`c` `c`, `fg2`.`ts` `ts`, `fg2`.`pk` `pk`, `fg0`.`e` `e`, `fg0`.`f` `f`, `fg1`.`g` `g`, `fg1`.`h` `h`, `fg1`.`i` `i`\nFROM `#{featurestore_name}`.`test_fg_1#{featuregroup_suffix}_1` `fg2`\nINNER JOIN `#{featurestore_name}`.`test_fg_2#{featuregroup_suffix}_1` `fg0` ON `fg2`.`pk` = `fg0`.`pk`\nINNER JOIN `#{featurestore_name}`.`test_fg_3#{featuregroup_suffix}_1` `fg1` ON `fg2`.`pk` = `fg1`.`pk`\nWHERE `fg2`.`ts` >= TIMESTAMP '1970-01-01 12:00:01.000' AND `fg2`.`ts` < TIMESTAMP '1970-01-01 12:00:04.000'") + expect(parsed_query_result['queryOnline']).to eql("SELECT `fg2`.`b` `b`, `fg2`.`c` `c`, `fg2`.`ts` `ts`, `fg2`.`pk` `pk`, `fg0`.`e` `e`, `fg0`.`f` `f`, `fg1`.`g` `g`, `fg1`.`h` `h`, `fg1`.`i` `i`\nFROM `#{project_name.downcase}`.`test_fg_1#{featuregroup_suffix}_1` `fg2`\nINNER JOIN `#{project_name.downcase}`.`test_fg_2#{featuregroup_suffix}_1` `fg0` ON `fg2`.`pk` = `fg0`.`pk`\nINNER JOIN `#{project_name.downcase}`.`test_fg_3#{featuregroup_suffix}_1` `fg1` ON `fg2`.`pk` = `fg1`.`pk`\nWHERE `fg2`.`ts` >= TIMESTAMP '1970-01-01 12:00:01.000' AND `fg2`.`ts` < TIMESTAMP '1970-01-01 12:00:04.000'") + expect_status_details(200) + end end end end diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java index c98f8bfbde..ef511a015f 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java @@ -84,7 +84,7 @@ public FsQueryDTO build(UriInfo uriInfo, Project project, Users user, Featuresto public FsQueryDTO build(UriInfo uriInfo, Project project, Users user, FeatureView featureView) throws FeaturestoreException, ServiceException { - Query query = queryController.makeQuery(featureView, project, user, true, false); + Query query = queryController.makeQuery(featureView, project, user, true, false, false, true, true, false); FsQueryDTO dto = constructorController.construct(query, pitJoinController.isPitEnabled(query), true, project, user); dto.setHref(uri(uriInfo, project)); diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java index 0dc1d32b7d..cc9c5256aa 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java @@ -155,7 +155,7 @@ public FeatureViewDTO build(FeatureView featureView, ResourceRequest resourceReq base.setQueryString(fsQueryBuilder.build(uriInfo, project, user, featureView)); } if (resourceRequest.contains(ResourceRequest.Name.QUERY)) { - Query query = queryController.makeQuery(featureView, project, user, true, false); + Query query = queryController.makeQuery(featureView, project, user, true, false, false, true, true, false); base.setQuery(queryBuilder.build(query, featureView.getFeaturestore(), project, user)); } if (resourceRequest.contains(ResourceRequest.Name.FEATURES)) { @@ -204,7 +204,7 @@ private List makeFeatures(FeatureView featureView) { f.getFeatureGroup().getVersion(), f.getFeatureGroup().isDeprecated()) : null, - f.getIndex(), f.isLabel())) + f.getIndex(), f.isLabel(), f.isInferenceHelperColumn(), f.isTrainingHelperColumn())) .collect(Collectors.toList()); } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java index 3fc4e0ce0b..55fc746ef5 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java @@ -95,11 +95,16 @@ public Response getPreparedStatements( @ApiParam(value = "get batch serving vectors", example = "false") @QueryParam("batch") @DefaultValue("false") - boolean batch) + boolean batch, + @ApiParam(value = "get inference helper columns", example = "false") + @QueryParam("inference_helper_columns") + @DefaultValue("false") + boolean inference_helper_columns) throws FeaturestoreException { Users user = jWTHelper.getUserPrincipal(sc); ServingPreparedStatementDTO servingPreparedStatementDTO = preparedStatementBuilder.build(uriInfo, - new ResourceRequest(ResourceRequest.Name.PREPAREDSTATEMENTS), project, user, featurestore, featureView, batch); + new ResourceRequest(ResourceRequest.Name.PREPAREDSTATEMENTS), project, user, featurestore, featureView, batch, + inference_helper_columns); return Response.ok().entity(servingPreparedStatementDTO).build(); } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java index 8b7126bf80..f285f46a7c 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java @@ -100,6 +100,22 @@ public Response constructBatchQuery( @QueryParam("with_label") @DefaultValue("false") Boolean withLabel, + @ApiParam(value = "Get query with primary key features") + @QueryParam("with_primary_keys") + @DefaultValue("false") + Boolean withPrimaryKeys, + @ApiParam(value = "Get query with primary event time feature") + @QueryParam("with_event_time") + @DefaultValue("false") + Boolean withEventTime, + @ApiParam(value = "Get query with inference helper columns") + @QueryParam("inference_helper_columns") + @DefaultValue("false") + Boolean inferenceHelperColumns, + @ApiParam(value = "Get query with training helper columns") + @QueryParam("training_helper_columns") + @DefaultValue("false") + Boolean trainingHelperColumns, @ApiParam(value = "Get query in hive format") @QueryParam("is_hive_engine") @DefaultValue("false") @@ -110,7 +126,8 @@ public Response constructBatchQuery( ) throws FeaturestoreException, ServiceException { Users user = jWTHelper.getUserPrincipal(sc); Query query = queryController.constructBatchQuery( - featureView, project, user, startTime, endTime, withLabel, isHiveEngine, trainingDataVersion); + featureView, project, user, startTime, endTime, withLabel, withPrimaryKeys, withEventTime, + inferenceHelperColumns, trainingHelperColumns, isHiveEngine, trainingDataVersion); return Response.ok().entity(queryBuilder.build(query, featurestore, project, user)).build(); } @@ -130,7 +147,7 @@ public Response getQuery( HttpServletRequest req ) throws FeaturestoreException, ServiceException { Users user = jWTHelper.getUserPrincipal(sc); - Query query = queryController.makeQuery(featureView, project, user, true, false); + Query query = queryController.makeQuery(featureView, project, user, true, false, false, true, true, false); QueryDTO queryDTO = queryBuilder.build(query, featurestore, project, user); return Response.ok().entity(queryDTO).build(); } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java index 47fc5a501f..2417bf38d7 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java @@ -142,16 +142,16 @@ private List getServingStatements(TrainingDataset t } List servingPreparedStatementDTOS = - createServingPreparedStatementDTOS(joins, project, user, batch); + createServingPreparedStatementDTOS(joins, project, user, batch, false); return servingPreparedStatementDTOS; } public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, Users user, Featurestore featurestore, FeatureView featureView, - boolean batch) throws FeaturestoreException { + boolean batch, boolean inferenceHelperColumns) throws FeaturestoreException { List servingPreparedStatementDTOs = - getServingStatements(featureView, project, user, batch); + getServingStatements(featureView, project, user, batch, inferenceHelperColumns); ServingPreparedStatementDTO servingPreparedStatementDTO = new ServingPreparedStatementDTO(); servingPreparedStatementDTO.setHref(uri(uriInfo, project, featurestore, featureView)); @@ -163,9 +163,8 @@ public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resour return servingPreparedStatementDTO; } - private List getServingStatements(FeatureView featureView, Project project, - Users user, boolean batch) - throws FeaturestoreException { + private List getServingStatements(FeatureView featureView, + Project project, Users user, boolean batch, boolean inferenceHelperColumns) throws FeaturestoreException { List joins = trainingDatasetController.getJoinsSorted(featureView.getJoins()); // Check that all the feature groups still exists, if not throw a reasonable error if (featureView.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) { @@ -173,13 +172,13 @@ private List getServingStatements(FeatureView featu } List servingPreparedStatementDTOS = - createServingPreparedStatementDTOS(joins, project, user, batch); + createServingPreparedStatementDTOS(joins, project, user, batch, inferenceHelperColumns); return servingPreparedStatementDTOS; } private List createServingPreparedStatementDTOS( - Collection joins, Project project, Users user, boolean batch) + Collection joins, Project project, Users user, boolean batch, boolean inferenceHelperColumns) throws FeaturestoreException { List servingPreparedStatementDTOS = new ArrayList<>(); @@ -210,13 +209,21 @@ private List createServingPreparedStatementDTOS( Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key"); } - + // create td features - List selectFeatures = join.getFeatures().stream() + List selectFeatures; + if (inferenceHelperColumns) { + selectFeatures = join.getFeatures().stream().filter(TrainingDatasetFeature::isInferenceHelperColumn).map(tdf -> + featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList()); + } else { + selectFeatures = join.getFeatures().stream() .filter(tdf -> !tdf.isLabel()) + .filter(tdf -> !tdf.isInferenceHelperColumn()) + .filter(tdf -> !tdf.isTrainingHelperColumn()) .sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)) .map(tdf -> featureGroupFeatures.get(tdf.getName())) .collect(Collectors.toList()); + } if (batch) { // to be able to sort the batch correctly and align feature vectors from different feature groups @@ -227,7 +234,8 @@ private List createServingPreparedStatementDTOS( .collect(Collectors.toList())); } - // In some cases only label(s) are used from a feature group. In this case they will not be + // In some cases only label(s) or inference helper columns are used from a feature group. Or user may select only + // inference helper columns and some feature groups doesn't contain them. In this case they will not be // part of the prepared statement thus don't add to this query. if (selectFeatures.size() > 0){ // construct query for this feature group diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetDTOBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetDTOBuilder.java index ec70218f3d..e315b0b663 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetDTOBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetDTOBuilder.java @@ -87,7 +87,7 @@ public TrainingDatasetDTO build(Users user, Project project, TrainingDataset tra if (resourceRequest.contains(ResourceRequest.Name.EXTRAFILTER)) { FeatureView featureView = trainingDataset.getFeatureView(); featureView.setFilters(trainingDataset.getFilters()); - Query query = queryController.makeQuery(featureView, project, user, true, true); + Query query = queryController.makeQuery(featureView, project, user, true, false, false, false, false, true); trainingDatasetDTO.setExtraFilter( queryBuilder.build(query, trainingDataset.getFeaturestore(), project, user).getFilter() ); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java index 461926e7cc..0c472202b6 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java @@ -332,7 +332,7 @@ public Jobs setupTrainingDatasetJob(Project project, Users user, FeatureView fea throws FeaturestoreException, JobException, GenericException, ProjectException, ServiceException { TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetByFeatureViewAndVersion( featureView, trainingDatasetVersion); - Query query = queryController.makeQuery(featureView, project, user, true, false); + Query query = queryController.makeQuery(featureView, project, user, true, false, false, false, false, false); QueryDTO queryDTO = queryBuilder.build(query, featureView.getFeaturestore(), project, user); return setupTrainingDatasetJob(project, user, trainingDataset, queryDTO, overwrite, writeOptions, sparkJobConfiguration, FEATURE_VIEW_TRAINING_DATASET_OP); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/feature/TrainingDatasetFeatureDTO.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/feature/TrainingDatasetFeatureDTO.java index 550e057d27..2eb542664a 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/feature/TrainingDatasetFeatureDTO.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/feature/TrainingDatasetFeatureDTO.java @@ -30,29 +30,35 @@ public class TrainingDatasetFeatureDTO { private String featureGroupFeatureName; private Integer index; private Boolean label = false; + private Boolean inferenceHelperColumn = false; + private Boolean trainingHelperColumn = false; private TransformationFunctionDTO transformationFunction; public TrainingDatasetFeatureDTO() { } public TrainingDatasetFeatureDTO(String name, String type, FeaturegroupDTO featuregroupDTO, Integer index, - Boolean label) { + Boolean label, Boolean inferenceHelperColumn , Boolean trainingHelperColumn) { this.name = name; this.type = type; this.featuregroup = featuregroupDTO; this.index = index; this.label = label; + this.inferenceHelperColumn = inferenceHelperColumn; + this.trainingHelperColumn = trainingHelperColumn; } public TrainingDatasetFeatureDTO(String name, String type, FeaturegroupDTO featuregroup, String featureGroupFeatureName, Integer index, Boolean label, - TransformationFunctionDTO transformationFunction) { + Boolean inferenceHelperColumn , Boolean trainingHelperColumn, TransformationFunctionDTO transformationFunction) { this.name = name; this.type = type; this.featuregroup = featuregroup; this.featureGroupFeatureName = featureGroupFeatureName; this.index = index; this.label = label; + this.inferenceHelperColumn = inferenceHelperColumn; + this.trainingHelperColumn = trainingHelperColumn; this.transformationFunction = transformationFunction; } @@ -103,7 +109,23 @@ public Boolean getLabel() { public void setLabel(Boolean label) { this.label = label; } - + + public Boolean getInferenceHelperColumn() { + return inferenceHelperColumn; + } + + public void setInferenceHelperColumn(Boolean inferenceHelperColumn) { + this.inferenceHelperColumn = inferenceHelperColumn; + } + + public Boolean getTrainingHelperColumn() { + return trainingHelperColumn; + } + + public void setTrainingHelperColumn(Boolean trainingHelperColumn) { + this.trainingHelperColumn = trainingHelperColumn; + } + public TransformationFunctionDTO getTransformationFunction() { return transformationFunction; } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewController.java index 2924855db5..3d598ecf6e 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewController.java @@ -126,7 +126,7 @@ public FeatureView createFeatureView(Project project, Users user, FeatureView fe throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_VIEW_ALREADY_EXISTS, Level.FINE, "Feature view: " + featureView.getName() + ", version: " + featureView.getVersion()); } - + // Since training dataset created by feature view shares the same name, need to make sure name of feature view // do not collide with existing training dataset created without feature view. List trainingDatasets = trainingDatasetFacade diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java index ddd30a3717..efbcf6d519 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java @@ -512,12 +512,18 @@ void removeDuplicateColumns(Query query, boolean pitEnabled) { } } - public Query makeQuery(FeatureView featureView, Project project, Users user, boolean withLabel, Boolean isHiveEngine) + public Query makeQuery(FeatureView featureView, Project project, Users user, boolean withLabel, + boolean withPrimaryKeys, boolean withEventTime, boolean inferenceHelperColumns, + boolean trainingHelperColumns, Boolean isHiveEngine) throws FeaturestoreException { - return makeQuery(featureView, project, user, withLabel, isHiveEngine, Lists.newArrayList()); + return makeQuery(featureView, project, user, withLabel, withPrimaryKeys, withEventTime, inferenceHelperColumns, + trainingHelperColumns, isHiveEngine, + Lists.newArrayList()); } - public Query makeQuery(FeatureView featureView, Project project, Users user, boolean withLabel, Boolean isHiveEngine, + public Query makeQuery(FeatureView featureView, Project project, Users user, boolean withLabel, + boolean withPrimaryKeys, boolean withEventTime, boolean inferenceHelperColumns, + boolean trainingHelperColumns, Boolean isHiveEngine, Collection extraFilters) throws FeaturestoreException { List joins = featureView.getJoins().stream() @@ -536,31 +542,40 @@ public Query makeQuery(FeatureView featureView, Project project, Users user, boo }) // drop label features if desired .filter(f -> !f.isLabel() || withLabel) + // drop extra features if desired + .filter(f -> !f.isInferenceHelperColumn() || inferenceHelperColumns) + .filter(f -> !f.isTrainingHelperColumn() || trainingHelperColumns) .collect(Collectors.toList()); return trainingDatasetController.getQuery( - joins, tdFeatures, featureView.getFilters(), project, user, isHiveEngine, extraFilters); + joins, tdFeatures, featureView.getFilters(), project, user, isHiveEngine, extraFilters, withPrimaryKeys, + withEventTime); } public Query constructBatchQuery(FeatureView featureView, Project project, Users user, Long startTimestamp, - Long endTimestamp, Boolean withLabel, Boolean isHiveEngine, Integer trainingDataVersion) - throws FeaturestoreException { + Long endTimestamp, Boolean withLabel, Boolean withPrimaryKeys, Boolean withEventTime, + Boolean inferenceHelperColumns, Boolean trainingHelperColumns, Boolean isHiveEngine, + Integer trainingDataVersion) throws FeaturestoreException { Date startTime = startTimestamp == null ? null : new Date(startTimestamp); Date endTime = endTimestamp == null ? null : new Date(endTimestamp); - return constructBatchQuery(featureView, project, user, startTime, endTime, withLabel, isHiveEngine, - trainingDataVersion); + return constructBatchQuery(featureView, project, user, startTime, endTime, withLabel, withPrimaryKeys, + withEventTime, inferenceHelperColumns, trainingHelperColumns, isHiveEngine, trainingDataVersion); } public Query constructBatchQuery(FeatureView featureView, Project project, Users user, Date startTime, - Date endTime, Boolean withLabel, Boolean isHiveEngine, Integer trainingDataVersion) - throws FeaturestoreException { + Date endTime, Boolean withLabel, Boolean withPrimaryKeys, Boolean withEventTime, + Boolean inferenceHelperColumns, Boolean trainingHelperColumns, Boolean isHiveEngine, + Integer trainingDataVersion) throws FeaturestoreException { Query baseQuery; if (trainingDataVersion != null) { - baseQuery = makeQuery(featureView, project, user, withLabel, isHiveEngine, + baseQuery = makeQuery(featureView, project, user, withLabel, withPrimaryKeys, withEventTime, + inferenceHelperColumns, + trainingHelperColumns, isHiveEngine, trainingDatasetController.getTrainingDatasetByFeatureViewAndVersion(featureView, trainingDataVersion) .getFilters()); } else { - baseQuery = makeQuery(featureView, project, user, withLabel, isHiveEngine); + baseQuery = makeQuery(featureView, project, user, withLabel, withPrimaryKeys, withEventTime, + inferenceHelperColumns, trainingHelperColumns, isHiveEngine); } return appendEventTimeFilter(baseQuery, startTime, endTime); } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java index fc2feaa6ce..1ffcb0c948 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java @@ -62,7 +62,9 @@ import io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType; +import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat; +import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeature; import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn; import io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig; @@ -235,7 +237,7 @@ public TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project projec f.getFeatureGroup().getVersion(), f.getFeatureGroup().isDeprecated()) : null, - f.getIndex(), f.isLabel())) + f.getIndex(), f.isLabel(), f.isInferenceHelperColumn() , f.isTrainingHelperColumn())) .collect(Collectors.toList())); } @@ -260,7 +262,7 @@ public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Fea // Name of Training data = _, version is needed // because there can be multiple training dataset of same name from different version of feature view trainingDatasetDTO.setName(featureView.getName() + "_" + featureView.getVersion()); - Query query = queryController.makeQuery(featureView, project, user, true, false); + Query query = queryController.makeQuery(featureView, project, user, true, false, false, false, false, false); if (query.getDeletedFeatureGroups() != null && !query.getDeletedFeatureGroups().isEmpty()) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATUREGROUP_NOT_FOUND, Level.SEVERE, String.format("Cannot create the training dataset because " + @@ -634,6 +636,8 @@ public List collectFeatures(Query query, List features = new ArrayList<>(); boolean isLabel = false; + boolean isInferenceHelper = false; + boolean isTrainingHelper = false; TransformationFunction transformationFunction = null; for (Feature f : query.getFeatures()) { if (featureDTOs != null && !featureDTOs.isEmpty()) { @@ -643,14 +647,30 @@ public List collectFeatures(Query query, List + f.getName().equals(dto.getName()) && dto.getInferenceHelperColumn() + // If feature group is null, it assumes matching name of the extra feature only. + && (dto.getFeaturegroup() == null || f.getFeatureGroup().getId().equals(dto.getFeaturegroup().getId())) + ); + + // identify if it is inference training feature + isTrainingHelper = featureDTOs.stream().anyMatch(dto -> + f.getName().equals(dto.getName()) && dto.getTrainingHelperColumn() + // If feature group is null, it assumes matching name of the extra feature only. + && (dto.getFeaturegroup() == null || f.getFeatureGroup().getId().equals(dto.getFeaturegroup().getId())) + ); + // get transformation function for this feature transformationFunction = getTransformationFunction(f, featureDTOs, tdJoins.get(joinIndex).getPrefix()); } features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), - f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : + f.getName(), f.getType(), featureIndex++, isLabel, isInferenceHelper, isTrainingHelper, + transformationFunction): new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), - f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction)); + f.getName(), f.getType(), featureIndex++, isLabel, isInferenceHelper, isTrainingHelper, + transformationFunction)); } if (query.getJoins() != null) { @@ -1076,18 +1096,20 @@ public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Projec // to respect the ordering, all selected features are added to the left most Query instead of splitting them // over the querys for their respective origin feature group List tdFeatures = getFeaturesSorted(trainingDataset, withLabel); - return getQuery(joins, tdFeatures, trainingDataset.getFilters(), project, user, isHiveEngine); + return getQuery(joins, tdFeatures, trainingDataset.getFilters(), project, user, isHiveEngine, false, false); } public Query getQuery(List joins, List tdFeatures, Collection trainingDatasetFilters, Project project, - Users user, Boolean isHiveEngine) throws FeaturestoreException { - return getQuery(joins, tdFeatures, trainingDatasetFilters, project, user, isHiveEngine, Lists.newArrayList()); + Users user, Boolean isHiveEngine, boolean withPrimaryKeys, boolean withEventTime) throws FeaturestoreException { + return getQuery(joins, tdFeatures, trainingDatasetFilters, project, user, isHiveEngine, Lists.newArrayList(), + withPrimaryKeys, withEventTime); } - + public Query getQuery(List joins, List tdFeatures, Collection trainingDatasetFilters, Project project, - Users user, Boolean isHiveEngine, Collection extraFilters) throws FeaturestoreException { + Users user, Boolean isHiveEngine, Collection extraFilters, boolean withPrimaryKeys, + boolean withEventTime) throws FeaturestoreException { // Convert all the TrainingDatasetFeatures to QueryFeatures Map fgAliasLookup = getAliasLookupTable(joins); @@ -1116,11 +1138,12 @@ public Query getQuery(List joins, List featureLookup = availableFeaturesLookup.values().stream().flatMap(List::stream) .collect(Collectors.toMap( f -> makeFeatureLookupKey(f.getFeatureGroup().getId(), f.getName()), f -> f, (f1, f2) -> f1)); - + List features = new ArrayList<>(); for (TrainingDatasetFeature requestedFeature : tdFeatures) { Feature tdFeature = featureLookup.get(makeFeatureLookupKey(requestedFeature.getFeatureGroup().getId(), requestedFeature.getName())); + if (tdFeature == null) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + @@ -1133,7 +1156,11 @@ public Query getQuery(List joins, List feature store name Map fsLookup = getFsLookupTableJoins(joins); @@ -1385,4 +1412,67 @@ public String checkPrefix(TrainingDatasetFeature feature) { return feature.getName(); } } + + private void addSelectedFeature(List featureNames, Featuregroup featureGroup, + Map featureLookup, List features, TrainingDatasetJoin join, + Map fgAliasLookup) { + + for (String featureName: featureNames) { + Feature feature = featureLookup.get(makeFeatureLookupKey(featureGroup.getId(), featureName)); + Feature newFeature = new Feature(feature.getName(), + fgAliasLookup.get(join.getId()), + feature.getType(), feature.getDefaultValue(), join.getPrefix(), + join.getFeatureGroup(), + null); + + // check if user already selected feature in the query + if (features.stream().noneMatch(f -> f.getName().equals(featureName) & + (f.getFeatureGroup().getId().equals(f.getFeatureGroup().getId())))) { + features.add( + newFeature + ); + } + } + } + + private List addPrimaryKeyEventTimeFeature(List features, Map featureLookup, + List joins, Map fgAliasLookup, + boolean withEventTime, boolean withPrimaryKeys) { + + if (withPrimaryKeys || withEventTime) { + for (TrainingDatasetJoin join: joins) { + List featureNames = new ArrayList<>(); + // 1st collect primary key and event time feature names in the join + if (withEventTime && join.getFeatureGroup().getEventTime() != null) { + featureNames.add(join.getFeatureGroup().getEventTime()); + } + + if (withPrimaryKeys) { + if (join.getFeatureGroup().getStreamFeatureGroup() != null) { + featureNames.addAll(join.getFeatureGroup().getStreamFeatureGroup().getFeaturesExtraConstraints().stream() + .filter(CachedFeatureExtraConstraints::getPrimary) + .map(CachedFeatureExtraConstraints::getName) + .collect(Collectors.toList()) + ); + } + if (join.getFeatureGroup().getCachedFeaturegroup() != null) { + featureNames.addAll(join.getFeatureGroup().getCachedFeaturegroup().getFeaturesExtraConstraints().stream() + .filter(CachedFeatureExtraConstraints::getPrimary) + .map(CachedFeatureExtraConstraints::getName) + .collect(Collectors.toList()) + ); + } + if (join.getFeatureGroup().getOnDemandFeaturegroup() != null) + featureNames.addAll(join.getFeatureGroup().getOnDemandFeaturegroup().getFeatures().stream() + .filter(OnDemandFeature::getPrimary).map(OnDemandFeature::getName) + .collect(Collectors.toList()) + ); + } + // now add these features to the list + addSelectedFeature(featureNames, join.getFeatureGroup(), featureLookup, + features, join, fgAliasLookup); + } + } + return features; + } } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java index 0ffe79464f..9d2401a83c 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java @@ -320,6 +320,12 @@ public void validateFeatures(Query query, List featur List featuresWithTransformation = featuresDTOs.stream() .filter(f -> f.getTransformationFunction() != null) .collect(Collectors.toList()); + List inferenceHelperColumns = featuresDTOs.stream() + .filter(TrainingDatasetFeatureDTO::getInferenceHelperColumn) + .collect(Collectors.toList()); + List trainingHelperColumns = featuresDTOs.stream() + .filter(TrainingDatasetFeatureDTO::getTrainingHelperColumn) + .collect(Collectors.toList()); List features = collectFeatures(query); for (TrainingDatasetFeatureDTO label : labels) { @@ -328,7 +334,21 @@ public void validateFeatures(Query query, List featur "Label: " + label.getName() + " is missing"); } } - + + for (TrainingDatasetFeatureDTO inferenceHelperColumn : inferenceHelperColumns) { + if (features.stream().noneMatch(f -> f.getName().equals(inferenceHelperColumn.getName()))) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.LABEL_NOT_FOUND, Level.FINE, + "Inference helper column: " + inferenceHelperColumn.getName() + " is missing"); + } + } + + for (TrainingDatasetFeatureDTO trainingHelperColumn : trainingHelperColumns) { + if (features.stream().noneMatch(f -> f.getName().equals(trainingHelperColumn.getName()))) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HELPER_COL_NOT_FOUND, Level.FINE, + "Training helper column: " + trainingHelperColumn.getName() + " is missing"); + } + } + for (TrainingDatasetFeatureDTO featureWithTransformation : featuresWithTransformation) { if (features.stream().noneMatch(f -> f.getName().equals(featureWithTransformation.getFeatureGroupFeatureName()))) { diff --git a/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestConstructorController.java b/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestConstructorController.java index 9594d1284e..d4442eee04 100644 --- a/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestConstructorController.java +++ b/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestConstructorController.java @@ -1069,7 +1069,7 @@ public void testConstruct_deletedFeatureGroup() throws Exception { tdFeatures.add(new TrainingDatasetFeature("feature_existing", new Featuregroup())); Query query = trainingDatasetController.getQuery(new ArrayList<>(), tdFeatures, Collections.emptyList(), project, - user, false); + user, false, false, false); FsQueryDTO result = target.construct(query, false, false, project, user); Assert.assertEquals("Parent feature groups of the following features are not available anymore: feature_missing", result.getQuery()); diff --git a/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetControllerTest.java b/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetControllerTest.java index e696133ee9..a7a2828cd8 100644 --- a/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetControllerTest.java +++ b/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetControllerTest.java @@ -449,25 +449,24 @@ public void testGetQuery_deletedFeatureGroup() throws Exception { tdFeatures.add(new TrainingDatasetFeature("feature_existing", new Featuregroup())); Query result = target.getQuery(new ArrayList<>(), tdFeatures, Collections.emptyList(), Mockito.mock(Project.class), - Mockito.mock(Users.class), false); + Mockito.mock(Users.class), false, false, false); Assert.assertFalse(result.getDeletedFeatureGroups().isEmpty()); Assert.assertEquals(1, result.getDeletedFeatureGroups().size()); Assert.assertEquals("feature_missing", result.getDeletedFeatureGroups().get(0)); } - + @Test public void testGetQuery_noFeature() { try { target.getQuery(new ArrayList<>(), Collections.emptyList(), Collections.emptyList(), - Mockito.mock(Project.class), Mockito.mock(Users.class), false); - + Mockito.mock(Project.class), Mockito.mock(Users.class), false, false, false); + Assert.fail("Expected FeaturestoreException, but no exception was thrown."); } catch (FeaturestoreException e) { Assert.assertEquals(FEATURE_NOT_FOUND, e.getErrorCode()); } } - @Test public void testCollectFeatures() throws Exception { // prepare TransformationFunctionDTO @@ -485,8 +484,8 @@ public void testCollectFeatures() throws Exception { FeaturegroupDTO fgDto = new FeaturegroupDTO(); fgDto.setId(1); List tdFeatureDtos = new ArrayList<>(); - tdFeatureDtos.add(new TrainingDatasetFeatureDTO("f1", "double", fgDto, "f1", 0, false, tfDto1)); - tdFeatureDtos.add(new TrainingDatasetFeatureDTO("fg1_f1", "double", fgDto, "f1", 1, false, tfDto2)); + tdFeatureDtos.add(new TrainingDatasetFeatureDTO("f1", "double", fgDto, "f1", 0, false, false, false, tfDto1)); + tdFeatureDtos.add(new TrainingDatasetFeatureDTO("fg1_f1", "double", fgDto, "f1", 1, false, false, false, tfDto2)); // prepare Query Query query = new Query(); Feature feature1 = new Feature("f1", false); diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFeature.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFeature.java index 65a074d161..f2e18c4aa3 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFeature.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFeature.java @@ -62,6 +62,12 @@ public class TrainingDatasetFeature implements Serializable { @Basic(optional = false) @Column(name = "label") private boolean label; + @Basic(optional = false) + @Column(name = "inference_helper_column") + private boolean inferenceHelperColumn; + @Basic(optional = false) + @Column(name = "training_helper_column") + private boolean trainingHelperColumn; @JoinColumn(name = "transformation_function", referencedColumnName = "id") private TransformationFunction transformationFunction; @@ -75,8 +81,8 @@ public TrainingDatasetFeature(String name, Featuregroup featureGroup) { } public TrainingDatasetFeature(FeatureView featureView, TrainingDatasetJoin trainingDatasetJoin, - Featuregroup featureGroup, String name, String type, Integer index, boolean label, - TransformationFunction transformationFunction) { + Featuregroup featureGroup, String name, String type, Integer index, boolean label, boolean inferenceHelperColumn, + boolean trainingHelperColumn, TransformationFunction transformationFunction) { this.featureView = featureView; this.trainingDatasetJoin = trainingDatasetJoin; this.featureGroup = featureGroup; @@ -84,11 +90,14 @@ public TrainingDatasetFeature(FeatureView featureView, TrainingDatasetJoin train this.type = type; this.index = index; this.label = label; + this.inferenceHelperColumn = inferenceHelperColumn; + this.trainingHelperColumn = trainingHelperColumn; this.transformationFunction = transformationFunction; } public TrainingDatasetFeature(TrainingDataset trainingDataset, TrainingDatasetJoin trainingDatasetJoin, Featuregroup featureGroup, String name, String type, Integer index, boolean label, + boolean inferenceHelperColumn, boolean trainingHelperColumn, TransformationFunction transformationFunction) { this.trainingDataset = trainingDataset; this.trainingDatasetJoin = trainingDatasetJoin; @@ -97,6 +106,8 @@ public TrainingDatasetFeature(TrainingDataset trainingDataset, TrainingDatasetJo this.type = type; this.index = index; this.label = label; + this.inferenceHelperColumn = inferenceHelperColumn; + this.trainingHelperColumn = trainingHelperColumn; this.transformationFunction = transformationFunction; } @@ -177,7 +188,23 @@ public boolean isLabel() { public void setLabel(boolean label) { this.label = label; } - + + public boolean isInferenceHelperColumn() { + return inferenceHelperColumn; + } + + public void setInferenceHelperColumn(boolean inferenceHelperColumn) { + this.inferenceHelperColumn = inferenceHelperColumn; + } + + public boolean isTrainingHelperColumn() { + return trainingHelperColumn; + } + + public void setTrainingHelperColumn(boolean trainingHelperColumn) { + this.trainingHelperColumn = trainingHelperColumn; + } + public TransformationFunction getTransformationFunction() { return transformationFunction; } diff --git a/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java b/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java index 16f62f42eb..8de93ffeea 100644 --- a/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java +++ b/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java @@ -1677,7 +1677,9 @@ public enum FeaturestoreErrorCode implements RESTErrorCode { Response.Status.INTERNAL_SERVER_ERROR), SPINE_GROUP_ON_RIGHT_SIDE_OF_JOIN_NOT_ALLOWED(223, "Spine groups cannot be used on the right side" + "of a feature view join.", Response.Status.BAD_REQUEST), - FEATURE_GROUP_DUPLICATE_FEATURE(224, "Feature list contains duplicate", Response.Status.BAD_REQUEST); + FEATURE_GROUP_DUPLICATE_FEATURE(224, "Feature list contains duplicate", Response.Status.BAD_REQUEST), + HELPER_COL_NOT_FOUND(225, "Could not find helper column in feature view schema", + Response.Status.NOT_FOUND); private int code; private String message;