-
Notifications
You must be signed in to change notification settings - Fork 978
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ignore features in FeatureRow if it's not requested in import spec (#101
) * Ignore features in FeatureRow if it's not requested in import spec * Revert "Ignore features in FeatureRow if it's not requested in import spec" This reverts commit 98d11cf. * Filter unknown features in PubSub and Kafka source * Add license * Set default to false
- Loading branch information
1 parent
a4fcbb5
commit 1691a12
Showing
4 changed files
with
174 additions
and
26 deletions.
There are no files selected for viewing
47 changes: 47 additions & 0 deletions
47
ingestion/src/main/java/feast/ingestion/transform/fn/FilterFeatureRowDoFn.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Copyright 2018 The Feast Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
package feast.ingestion.transform.fn; | ||
|
||
import feast.types.FeatureProto.Feature; | ||
import feast.types.FeatureRowProto.FeatureRow; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Set; | ||
import org.apache.beam.sdk.transforms.DoFn; | ||
|
||
/** | ||
* Filter FeatureRow to only contain feature with given IDs | ||
*/ | ||
public class FilterFeatureRowDoFn extends DoFn<FeatureRow, FeatureRow> { | ||
private final Set<String> featureIds; | ||
|
||
public FilterFeatureRowDoFn(List<String> featureIds) { | ||
this.featureIds = new HashSet<>(featureIds); | ||
} | ||
|
||
@ProcessElement | ||
public void processElement(ProcessContext context) { | ||
FeatureRow input = context.element(); | ||
FeatureRow.Builder output = FeatureRow.newBuilder(input).clearFeatures(); | ||
for (Feature feature : input.getFeaturesList()) { | ||
if (featureIds.contains(feature.getId())) { | ||
output.addFeatures(feature); | ||
} | ||
} | ||
context.output(output.build()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
ingestion/src/test/java/feast/ingestion/transform/fn/FilterFeatureRowDoFnTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Copyright 2018 The Feast Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
package feast.ingestion.transform.fn; | ||
|
||
import feast.types.FeatureProto.Feature; | ||
import feast.types.FeatureRowProto.FeatureRow; | ||
import feast.types.ValueProto.Value; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import org.apache.beam.sdk.testing.PAssert; | ||
import org.apache.beam.sdk.testing.TestPipeline; | ||
import org.apache.beam.sdk.transforms.Create; | ||
import org.apache.beam.sdk.transforms.ParDo; | ||
import org.apache.beam.sdk.values.PCollection; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
|
||
public class FilterFeatureRowDoFnTest { | ||
@Rule public TestPipeline testPipeline = TestPipeline.create(); | ||
|
||
@Test | ||
public void shouldIgnoreUnspecifiedFeatureID() { | ||
String featureId1 = "testentity.none.feature1"; | ||
String featureId2 = "testentity.hour.feature2"; | ||
String featureId3 = "testentity.day.feature3"; | ||
|
||
List<String> specifiedFeatureIds = Arrays.asList(featureId1, featureId2, featureId3); | ||
FilterFeatureRowDoFn doFn = new FilterFeatureRowDoFn(specifiedFeatureIds); | ||
|
||
FeatureRow row = | ||
FeatureRow.newBuilder() | ||
.setEntityKey("1234") | ||
.setEntityName("testentity") | ||
.addFeatures( | ||
Feature.newBuilder().setId(featureId1).setValue(Value.newBuilder().setInt64Val(10))) | ||
.addFeatures( | ||
Feature.newBuilder().setId(featureId2).setValue(Value.newBuilder().setInt64Val(11))) | ||
.addFeatures( | ||
Feature.newBuilder().setId(featureId3).setValue(Value.newBuilder().setInt64Val(12))) | ||
// this feature should be ignored | ||
.addFeatures(Feature.newBuilder().setId("testEntity.none.unknown_feature")) | ||
.build(); | ||
|
||
PCollection<FeatureRow> output = testPipeline.apply(Create.of(row)) | ||
.apply(ParDo.of(doFn)); | ||
|
||
FeatureRow expRow = | ||
FeatureRow.newBuilder() | ||
.setEntityKey("1234") | ||
.setEntityName("testentity") | ||
.addFeatures( | ||
Feature.newBuilder().setId(featureId1).setValue(Value.newBuilder().setInt64Val(10))) | ||
.addFeatures( | ||
Feature.newBuilder().setId(featureId2).setValue(Value.newBuilder().setInt64Val(11))) | ||
.addFeatures( | ||
Feature.newBuilder().setId(featureId3).setValue(Value.newBuilder().setInt64Val(12))) | ||
.build(); | ||
PAssert.that(output).containsInAnyOrder(expRow); | ||
|
||
testPipeline.run(); | ||
} | ||
} |