-
Notifications
You must be signed in to change notification settings - Fork 229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Misc issue fixes #581
Misc issue fixes #581
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
package com.linkedin.feathr.offline.util | ||
|
||
case class FeaturizedDatasetMetadata(meta: Map[String, String] = Map()) { | ||
import com.linkedin.feathr.common.Header | ||
/** | ||
* The metadata for FeaturizedDataset | ||
* @param meta extra metadata | ||
* @param header feature type header info | ||
*/ | ||
case class FeaturizedDatasetMetadata(meta: Map[String, String] = Map(), header: Option[Header] = None ) { | ||
Comment on lines
+4
to
+9
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be more docs about what this means; what a featurized-dataset is? (Do we have docs about it in this public version of feathr? Should we just call it as feathr's output dataset that includes info about the tensor types of joined features...?) Also, what does "extra metadata" mean; what kinds of things is the map allowed/expected to contain, what are the keys, values, etc. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package com.linkedin.feathr.offline.anchored.keyExtractor | ||
|
||
class AlienSampleKeyExtractor extends AlienSourceKeyExtractor { | ||
override def getKey(): Seq[String] = Seq("1") | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package com.linkedin.feathr.offline.anchored.keyExtractor | ||
|
||
trait AlienSourceKeyExtractor extends Serializable { | ||
def getKey(): Seq[String] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package com.linkedin.feathr.offline.anchored.keyExtractor | ||
|
||
import com.linkedin.feathr.offline.client.plugins.SourceKeyExtractorAdaptor | ||
import com.linkedin.feathr.sparkcommon.SourceKeyExtractor | ||
import org.apache.spark.sql.DataFrame | ||
|
||
class AlienSourceKeyExtractorAdaptor extends SourceKeyExtractorAdaptor { | ||
/** | ||
* Indicates whether this adaptor can be applied to an object of the provided class. | ||
* | ||
* Implementations should usually look like <pre>classOf[UdfTraitThatIsNotPartOfFeathr].isAssignableFrom(clazz)</pre> | ||
* | ||
* @param clazz some external UDF type | ||
* @return true if this adaptor can "adapt" the given class type; false otherwise | ||
*/ | ||
override def canAdapt(clazz: Class[_]): Boolean = classOf[AlienSourceKeyExtractor].isAssignableFrom(clazz) | ||
|
||
/** | ||
* Returns an instance of a Feathr UDF, that follows the behavior of some external UDF instance, e.g. via delegation. | ||
* | ||
* @param externalUdf instance of the "external" UDF | ||
* @return the Feathr UDF | ||
*/ | ||
override def adaptUdf(externalUdf: AnyRef): SourceKeyExtractor = new AlienSourceKeyExtractorWrapper(externalUdf.asInstanceOf[AlienSourceKeyExtractor]) | ||
|
||
/** | ||
* Wrap Alien SourceKeyExtractor as Feathr SourceKeyExtractor | ||
*/ | ||
private class AlienSourceKeyExtractorWrapper(keyExtractor: AlienSourceKeyExtractor) extends SourceKeyExtractor{ | ||
override def getKeyColumnNames(datum: Option[Any]): Seq[String] = Seq("mId") | ||
|
||
override def appendKeyColumns(dataFrame: DataFrame): DataFrame = { | ||
keyExtractor.getKey() | ||
dataFrame | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package com.linkedin.feathr.offline.derived | ||
|
||
import com.linkedin.feathr.common.{FeatureDerivationFunction, FeatureValue} | ||
import com.linkedin.feathr.offline.client.plugins.FeatureDerivationFunctionAdaptor | ||
import com.linkedin.feathr.offline.plugins.AlienFeatureValue | ||
|
||
class AlienDerivationFunctionAdaptor extends FeatureDerivationFunctionAdaptor { | ||
/** | ||
* Indicates whether this adaptor can be applied to an object of the provided class. | ||
* | ||
* Implementations should usually look like <pre>classOf[UdfTraitThatIsNotPartOfFeathr].isAssignableFrom(clazz)</pre> | ||
* | ||
* @param clazz some external UDF type | ||
* @return true if this adaptor can "adapt" the given class type; false otherwise | ||
*/ | ||
override def canAdapt(clazz: Class[_]): Boolean = classOf[AlienFeatureDerivationFunction].isAssignableFrom(clazz) | ||
|
||
/** | ||
* Returns an instance of a Feathr UDF, that follows the behavior of some external UDF instance, e.g. via delegation. | ||
* | ||
* @param externalUdf instance of the "external" UDF | ||
* @return the Feathr UDF | ||
*/ | ||
override def adaptUdf(externalUdf: AnyRef): FeatureDerivationFunction = | ||
new AlienFeatureDerivationFunctionWrapper(externalUdf.asInstanceOf[AlienFeatureDerivationFunction]) | ||
|
||
/** | ||
* Wrap Alien FeatureDerivationFunction as Feathr FeatureDerivationFunction | ||
*/ | ||
private[derived] class AlienFeatureDerivationFunctionWrapper(derived: AlienFeatureDerivationFunction) extends FeatureDerivationFunction { | ||
override def getFeatures(inputs: Seq[Option[FeatureValue]]): Seq[Option[FeatureValue]] = { | ||
derived.getFeatures(Seq(Some(AlienFeatureValue.fromFloat(1.0f)))) | ||
Seq(Some(FeatureValue.createNumeric(1.0f))) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.linkedin.feathr.offline.derived | ||
|
||
import com.linkedin.feathr.offline.plugins.AlienFeatureValue | ||
/* | ||
* Sample Alien FeatureDerivationFunction interface that can be adapted into Feathr FeatureDerivationFunction | ||
*/ | ||
abstract class AlienFeatureDerivationFunction extends Serializable { | ||
def getFeatures(inputs: Seq[Option[AlienFeatureValue]]): Seq[Option[AlienFeatureValue]] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package com.linkedin.feathr.offline.derived | ||
import com.linkedin.feathr.offline.plugins.AlienFeatureValue | ||
|
||
class SampleAlienFeatureDerivationFunction extends AlienFeatureDerivationFunction { | ||
override def getFeatures(inputs: Seq[Option[AlienFeatureValue]]): Seq[Option[AlienFeatureValue]] = | ||
Seq(Some(AlienFeatureValue.fromFloat(1.0f))) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the meaning of the
Seq()
case here? Can you explain it in a quick comment in the code