Skip to content

Commit

Permalink
add doc and header
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Oct 14, 2014
1 parent 60614c7 commit 60cc131
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 63 deletions.
Original file line number Diff line number Diff line change
@@ -1,65 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.util

import scala.collection.mutable

import org.json4s._
import org.json4s.jackson.JsonMethods._

import scala.reflect.ClassTag

sealed class Metadata private[util] (val map: Map[String, Any]) extends Serializable {

def getInt(key: String): Int = get(key)

/**
* Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
* Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
* Array[Metadata]. JSON is used for serialization.
*
* The default constructor is private. User should use either [[MetadataBuilder]] or
* [[Metadata$#fromJson]] to create Metadata instances.
*
* @param map an immutable map that stores the data
*/
sealed class Metadata private[util] (private[util] val map: Map[String, Any]) extends Serializable {

/** Gets a Long. */
def getLong(key: String): Long = get(key)

/** Gets a Double. */
def getDouble(key: String): Double = get(key)

/** Gets a Boolean. */
def getBoolean(key: String): Boolean = get(key)

/** Gets a String. */
def getString(key: String): String = get(key)

/** Gets a Metadata. */
def getMetadata(key: String): Metadata = get(key)

def getIntArray(key: String): Array[Int] = getArray(key)
/** Gets a Long array. */
def getLongArray(key: String): Array[Long] = get(key)

def getDoubleArray(key: String): Array[Double] = getArray(key)
/** Gets a Double array. */
def getDoubleArray(key: String): Array[Double] = get(key)

def getBooleanArray(key: String): Array[Boolean] = getArray(key)
/** Gets a Boolean array. */
def getBooleanArray(key: String): Array[Boolean] = get(key)

def getStringArray(key: String): Array[String] = getArray(key)
/** Gets a String array. */
def getStringArray(key: String): Array[String] = get(key)

def getMetadataArray(key: String): Array[Metadata] = getArray(key)
/** Gets a Metadata array. */
def getMetadataArray(key: String): Array[Metadata] = get(key)

/** Converts to its JSON representation. */
def toJson: String = {
compact(render(Metadata.toJValue(this)))
}

private def get[T](key: String): T = {
map(key).asInstanceOf[T]
}
override def toString: String = toJson

private def getArray[T: ClassTag](key: String): Array[T] = {
map(key).asInstanceOf[Seq[T]].toArray
override def equals(obj: Any): Boolean = {
obj match {
case that: Metadata =>
if (map.keySet == that.map.keySet) {
map.keys.forall { k =>
(map(k), that.map(k)) match {
case (v0: Array[_], v1: Array[_]) =>
v0.view == v1.view
case (v0, v1) =>
v0 == v1
}
}
} else {
false
}
case other =>
false
}
}

override def toString: String = toJson
override def hashCode: Int = Metadata.hash(this)

private def get[T](key: String): T = {
map(key).asInstanceOf[T]
}
}

object Metadata {

/** Returns an empty Metadata. */
def empty: Metadata = new Metadata(Map.empty)

/** Creates a Metadata instance from JSON. */
def fromJson(json: String): Metadata = {
val map = parse(json).values.asInstanceOf[Map[String, Any]]
fromMap(map.toMap)
}

/** Creates a Metadata instance from Map[String, Any]. */
private def fromMap(map: Map[String, Any]): Metadata = {
val builder = new MetadataBuilder
map.foreach {
case (key, value: Int) =>
builder.putInt(key, value)
case (key, value: BigInt) =>
builder.putInt(key, value.toInt)
builder.putLong(key, value.toLong)
case (key, value: Double) =>
builder.putDouble(key, value)
case (key, value: Boolean) =>
Expand All @@ -70,22 +125,21 @@ object Metadata {
builder.putMetadata(key, fromMap(value.asInstanceOf[Map[String, Any]]))
case (key, value: Seq[_]) =>
if (value.isEmpty) {
builder.putIntArray(key, Seq.empty)
// If it is an empty array, we cannot infer its element type. We put an empty Array[Long].
builder.putLongArray(key, Array.empty)
} else {
value.head match {
case _: Int =>
builder.putIntArray(key, value.asInstanceOf[Seq[Int]].toSeq)
case _: BigInt =>
builder.putIntArray(key, value.asInstanceOf[Seq[BigInt]].map(_.toInt).toSeq)
builder.putLongArray(key, value.asInstanceOf[Seq[BigInt]].map(_.toLong).toArray)
case _: Double =>
builder.putDoubleArray(key, value.asInstanceOf[Seq[Double]].toSeq)
builder.putDoubleArray(key, value.asInstanceOf[Seq[Double]].toArray)
case _: Boolean =>
builder.putBooleanArray(key, value.asInstanceOf[Seq[Boolean]].toSeq)
builder.putBooleanArray(key, value.asInstanceOf[Seq[Boolean]].toArray)
case _: String =>
builder.putStringArray(key, value.asInstanceOf[Seq[String]].toSeq)
case _: Map[String, Any] =>
builder.putStringArray(key, value.asInstanceOf[Seq[String]].toSeq.toArray)
case _: Map[_, _] =>
builder.putMetadataArray(
key, value.asInstanceOf[Seq[Map[String, Any]]].map(fromMap).toSeq)
key, value.asInstanceOf[Seq[Map[String, Any]]].map(fromMap).toArray)
case other =>
throw new RuntimeException(s"Do not support array of type ${other.getClass}.")
}
Expand All @@ -96,15 +150,16 @@ object Metadata {
builder.build()
}

/** Converts to JSON AST. */
private def toJValue(obj: Any): JValue = {
obj match {
case map: Map[_, _] =>
val fields = map.toList.map { case (k: String, v) => (k, toJValue(v)) }
val fields = map.toList.map { case (k: String, v) => (k, toJValue(v))}
JObject(fields)
case arr: Seq[_] =>
case arr: Array[_] =>
val values = arr.toList.map(toJValue)
JArray(values)
case x: Int =>
case x: Long =>
JInt(x)
case x: Double =>
JDouble(x)
Expand All @@ -118,37 +173,75 @@ object Metadata {
throw new RuntimeException(s"Do not support type ${other.getClass}.")
}
}

/** Computes the hash code for the types we support. */
private def hash(obj: Any): Int = {
obj match {
case map: Map[_, _] =>
map.mapValues(hash).##
case arr: Array[_] =>
// Seq.empty[T] has the same hashCode regardless of T.
arr.toSeq.map(hash).##
case x: Long =>
x.##
case x: Double =>
x.##
case x: Boolean =>
x.##
case x: String =>
x.##
case x: Metadata =>
hash(x.map)
case other =>
throw new RuntimeException(s"Do not support type ${other.getClass}.")
}
}
}

/**
* Builder for [[Metadata]]. If there is a key collision, the latter will overwrite the former.
*/
class MetadataBuilder {

private val map: mutable.Map[String, Any] = mutable.Map.empty

/** Include the content of an existing [[Metadata]] instance. */
def withMetadata(metadata: Metadata): this.type = {
map ++= metadata.map
this
}

def putInt(key: String, value: Int): this.type = put(key, value)
/** Puts a Long. */
def putLong(key: String, value: Long): this.type = put(key, value)

/** Puts a Double. */
def putDouble(key: String, value: Double): this.type = put(key, value)

/** Puts a Boolean. */
def putBoolean(key: String, value: Boolean): this.type = put(key, value)

/** Puts a String. */
def putString(key: String, value: String): this.type = put(key, value)

/** Puts a [[Metadata]]. */
def putMetadata(key: String, value: Metadata): this.type = put(key, value)

def putIntArray(key: String, value: Seq[Int]): this.type = put(key, value)
/** Puts a Long array. */
def putLongArray(key: String, value: Array[Long]): this.type = put(key, value)

def putDoubleArray(key: String, value: Seq[Double]): this.type = put(key, value)
/** Puts a Double array. */
def putDoubleArray(key: String, value: Array[Double]): this.type = put(key, value)

def putBooleanArray(key: String, value: Seq[Boolean]): this.type = put(key, value)
/** Puts a Boolean array. */
def putBooleanArray(key: String, value: Array[Boolean]): this.type = put(key, value)

def putStringArray(key: String, value: Seq[String]): this.type = put(key, value)
/** Puts a String array. */
def putStringArray(key: String, value: Array[String]): this.type = put(key, value)

def putMetadataArray(key: String, value: Seq[Metadata]): this.type = put(key, value)
/** Puts a [[Metadata]] array. */
def putMetadataArray(key: String, value: Array[Metadata]): this.type = put(key, value)

/** Builds the [[Metadata]] instance. */
def build(): Metadata = {
new Metadata(map.toMap)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,49 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.util

import org.json4s.jackson.JsonMethods._
import org.json4s.jackson.JsonMethods.parse
import org.scalatest.FunSuite

class MetadataSuite extends FunSuite {

val baseMetadata = new MetadataBuilder()
.putString("purpose", "ml")
.build()
.putString("purpose", "ml")
.putBoolean("isBase", true)
.build()

val summary = new MetadataBuilder()
.putInt("numFeatures", 10)
.build()
.putLong("numFeatures", 10L)
.build()

val age = new MetadataBuilder()
.putString("name", "age")
.putInt("index", 1)
.putBoolean("categorical", false)
.putDouble("average", 45.0)
.build()
.putString("name", "age")
.putLong("index", 1L)
.putBoolean("categorical", false)
.putDouble("average", 45.0)
.build()

val gender = new MetadataBuilder()
.putString("name", "gender")
.putInt("index", 5)
.putBoolean("categorical", true)
.putStringArray("categories", Seq("male", "female"))
.build()
.putString("name", "gender")
.putLong("index", 5)
.putBoolean("categorical", true)
.putStringArray("categories", Array("male", "female"))
.build()

val metadata = new MetadataBuilder()
.withMetadata(baseMetadata)
.putMetadata("summary", summary)
.putIntArray("int[]", Seq(0, 1))
.putDoubleArray("double[]", Seq(3.0, 4.0))
.putBooleanArray("boolean[]", Seq(true, false))
.putMetadataArray("features", Seq(age, gender))
.build()
.withMetadata(baseMetadata)
.putBoolean("isBase", false) // overwrite an existing key
.putMetadata("summary", summary)
.putLongArray("long[]", Array(0L, 1L))
.putDoubleArray("double[]", Array(3.0, 4.0))
.putBooleanArray("boolean[]", Array(true, false))
.putMetadataArray("features", Array(age, gender))
.build()

test("metadata builder and getters") {
assert(age.getInt("index") === 1)
assert(age.getLong("index") === 1L)
assert(age.getDouble("average") === 45.0)
assert(age.getBoolean("categorical") === false)
assert(age.getString("name") === "age")
assert(metadata.getString("purpose") === "ml")
assert(metadata.getBoolean("isBase") === false)
assert(metadata.getMetadata("summary") === summary)
assert(metadata.getIntArray("int[]").toSeq === Seq(0, 1))
assert(metadata.getLongArray("long[]").toSeq === Seq(0L, 1L))
assert(metadata.getDoubleArray("double[]").toSeq === Seq(3.0, 4.0))
assert(metadata.getBooleanArray("boolean[]").toSeq === Seq(true, false))
assert(gender.getStringArray("categories").toSeq === Seq("male", "female"))
Expand All @@ -55,6 +75,8 @@ class MetadataSuite extends FunSuite {
withClue("toJson must produce a valid JSON string") {
parse(json)
}
assert(Metadata.fromJson(json) === metadata)
val parsed = Metadata.fromJson(json)
assert(parsed === metadata)
assert(parsed.## === metadata.##)
}
}

0 comments on commit 60cc131

Please sign in to comment.