Add identifier tags in comments to work around scaladocs bug

The bug is that annotations and comments cannot both be displayed in the summary page. The workaround is adding a special pattern that we grep for as we post-process the DOM tree in JavaScript. Example: A @DeveloperAPI annotated class's comment must begin with ":: Developer API ::"
yinxusen · Apr 8, 2014 · f3954e0 · f3954e0
1 parent 99192ef
commit f3954e0
Show file tree

Hide file tree

Showing 38 changed files with 109 additions and 25 deletions.
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -21,6 +21,7 @@ import org.apache.spark.annotations.DeveloperAPI
 import org.apache.spark.util.collection.{AppendOnlyMap, ExternalAppendOnlyMap}
 
 /**
+ * :: DeveloperAPI ::
  * A set of functions used to aggregate data.
  *
  * @param createCombiner function to create the initial value of the aggregation.

diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -22,13 +22,15 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
 
 /**
+ * :: DeveloperAPI ::
  * Base class for dependencies.
  */
 @DeveloperAPI
 abstract class Dependency[T](val rdd: RDD[T]) extends Serializable
 
 
 /**
+ * :: DeveloperAPI ::
  * Base class for dependencies where each partition of the parent RDD is used by at most one
  * partition of the child RDD.  Narrow dependencies allow for pipelined execution.
  */
@@ -44,6 +46,7 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
 
 
 /**
+ * :: DeveloperAPI ::
  * Represents a dependency on the output of a shuffle stage.
  * @param rdd the parent RDD
  * @param partitioner partitioner used to partition the shuffle output
@@ -63,6 +66,7 @@ class ShuffleDependency[K, V](
 
 
 /**
+ * :: DeveloperAPI ::
  * Represents a one-to-one dependency between partitions of the parent and child RDDs.
  */
 @DeveloperAPI
@@ -72,6 +76,7 @@ class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {
 
 
 /**
+ * :: DeveloperAPI ::
  * Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs.
  * @param rdd the parent RDD
  * @param inStart the start of the range in the parent RDD

diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
 
 /**
+ * :: Experimental ::
  * A future for the result of an action to support cancellation. This is an extension of the
  * Scala Future interface to support cancellation.
  */
@@ -86,6 +87,7 @@ trait FutureAction[T] extends Future[T] {
 
 
 /**
+ * :: Experimental ::
  * A [[FutureAction]] holding the result of an action that triggers a single job. Examples include
  * count, collect, reduce.
  */
@@ -151,6 +153,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
 
 
 /**
+ * :: Experimental ::
  * A [[FutureAction]] for actions that could trigger multiple Spark jobs. Examples include take,
  * takeSample. Cancellation works by setting the cancelled flag to true and interrupting the
  * action thread if it is being blocked by a job.

diff --git a/core/src/main/scala/org/apache/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala
@@ -24,6 +24,7 @@ import org.slf4j.impl.StaticLoggerBinder
 import org.apache.spark.annotations.DeveloperAPI
 
 /**
+ * :: DeveloperAPI ::
  * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows
  * logging messages at different levels using methods that only evaluate parameters lazily if the
  * log level is enabled.

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -49,6 +49,7 @@ import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedHashMap, Utils}
 
 /**
+ * :: DeveloperAPI ::
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
  *
@@ -65,6 +66,7 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] var preferredNodeLocationData: Map[String, Set[SplitInfo]] = Map()
 
   /**
+   * :: DeveloperAPI ::
    * Alternative constructor for setting preferred locations where Spark will create executors.
    *
    * @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. Ca
@@ -716,6 +718,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
+   * :: DeveloperAPI ::
    * Register a listener to receive up-calls from events that happen during execution.
    */
   @DeveloperAPI
@@ -1028,6 +1031,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
+   * :: DeveloperAPI ::
    * Run a job that can return approximate results.
    */
   @DeveloperAPI
@@ -1046,6 +1050,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
+   *
    * Submit a job for execution and return a FutureJob holding the result.
    */
   @Experimental

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -36,6 +36,7 @@ import org.apache.spark.storage._
 import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
+ * :: DeveloperAPI ::
  * Holds all the runtime environment objects for a running Spark instance (either master or worker),
  * including the serializer, Akka actor system, block manager, map output tracker, etc. Currently
  * Spark code finds the SparkEnv through a thread-local variable, so each thread that accesses these

diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -23,6 +23,7 @@ import org.apache.spark.annotations.DeveloperAPI
 import org.apache.spark.executor.TaskMetrics
 
 /**
+ * :: DeveloperAPI ::
  * Contextual information about a task which can be read or mutated during execution.
  */
 @DeveloperAPI

diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -22,6 +22,7 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
 
 /**
+ * :: DeveloperAPI ::
  * Various possible reasons why a task ended. The low-level TaskScheduler is supposed to retry
  * tasks several times for "ephemeral" failures, and only report back failures that require some
  * old stages to be resubmitted, such as shuffle map fetch failures.
@@ -52,6 +53,7 @@ case class ExceptionFailure(
   extends TaskEndReason
 
 /**
+ * :: DeveloperAPI ::
  * The task finished successfully, but the result was lost from the executor's block manager before
  * it was fetched.
  */
@@ -62,13 +64,15 @@ case object TaskResultLost extends TaskEndReason
 case object TaskKilled extends TaskEndReason
 
 /**
+ * :: DeveloperAPI ::
  * The task failed because the executor that it was running on was lost. This may happen because
  * the task crashed the JVM.
  */
 @DeveloperAPI
 case object ExecutorLostFailure extends TaskEndReason
 
 /**
+ * :: DeveloperAPI ::
  * We don't know why the task ended -- for example, because of a ClassNotFound exception when
  * deserializing the task result.
  */

diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -22,7 +22,8 @@ import org.apache.spark.SparkConf
 import org.apache.spark.annotations.DeveloperAPI
 
 /**
- * An interface for all the broadcast implementations in Spark (to allow 
+ * :: DeveloperAPI ::
+ * An interface for all the broadcast implementations in Spark (to allow
  * multiple broadcast implementations). SparkContext uses a user-specified
  * BroadcastFactory implementation to instantiate a particular broadcast for the
  * entire Spark job.

diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -21,6 +21,7 @@ import org.apache.spark.annotations.DeveloperAPI
 import org.apache.spark.storage.{BlockId, BlockStatus}
 
 /**
+ * :: DeveloperAPI ::
  * Metrics tracked during the execution of a task.
  */
 @DeveloperAPI
@@ -88,6 +89,7 @@ private[spark] object TaskMetrics {
 
 
 /**
+ * :: DeveloperAPI ::
  * Metrics pertaining to shuffle data read in a given task.
  */
 @DeveloperAPI
@@ -126,6 +128,7 @@ class ShuffleReadMetrics extends Serializable {
 }
 
 /**
+ * :: DeveloperAPI ::
  * Metrics pertaining to shuffle data written in a given task.
  */
 @DeveloperAPI

diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -26,6 +26,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.annotations.DeveloperAPI
 
 /**
+ * :: DeveloperAPI ::
  * CompressionCodec allows the customization of choosing different compression implementations
  * to be used in block storage.
  *
@@ -58,6 +59,7 @@ private[spark] object CompressionCodec {
 
 
 /**
+ * :: DeveloperAPI ::
  * LZF implementation of [[org.apache.spark.io.CompressionCodec]].
  *
  * Note: The wire protocol for this codec is not guaranteed to be compatible across versions
@@ -76,6 +78,7 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
 
 
 /**
+ * :: DeveloperAPI ::
  * Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
  * Block size can be configured by spark.io.compression.snappy.block.size.
  *

diff --git a/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
@@ -20,6 +20,7 @@ package org.apache.spark.partial
 import org.apache.spark.annotations.Experimental
 
 /**
+ * :: Experimental ::
  * A Double value with error bars and associated confidence.
  */
 @Experimental

diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
 import org.apache.spark.annotations.Experimental
 
 /**
+ * :: Experimental ::
  * A set of asynchronous RDD actions available through an implicit conversion.
  * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */

diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -52,6 +52,7 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
 }
 
 /**
+ * :: DeveloperAPI ::
  * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
  * tuple with the list of values for that key.
  *

diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -71,6 +71,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
 }
 
 /**
+ * :: DeveloperAPI ::
  * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
  * sources in HBase, or S3), using the older MapReduce API (`org.apache.hadoop.mapred`).
  *

diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -37,6 +37,7 @@ class NewHadoopPartition(rddId: Int, val index: Int, @transient rawSplit: InputS
 }
 
 /**
+ * :: DeveloperAPI ::
  * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
  * sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
  *

diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
@@ -47,6 +47,7 @@ private[spark] class PruneDependency[T](rdd: RDD[T], @transient partitionFilterF
 
 
 /**
+ * :: DeveloperAPI ::
  * A RDD used to prune RDD partitions/partitions so we can avoid launching tasks on
  * all partitions. An example use case: If we know the RDD is partitioned by range,
  * and the execution DAG has a filter on the key, we can avoid launching tasks

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -88,26 +88,30 @@ abstract class RDD[T: ClassTag](
   // =======================================================================
 
   /**
+   * :: DeveloperAPI ::
    * Implemented by subclasses to compute a given partition.
    */
   @DeveloperAPI
   def compute(split: Partition, context: TaskContext): Iterator[T]
 
   /**
+   * :: DeveloperAPI ::
    * Implemented by subclasses to return the set of partitions in this RDD. This method will only
    * be called once, so it is safe to implement a time-consuming computation in it.
    */
   @DeveloperAPI
   protected def getPartitions: Array[Partition]
 
   /**
+   * :: DeveloperAPI ::
    * Implemented by subclasses to return how this RDD depends on parent RDDs. This method will only
    * be called once, so it is safe to implement a time-consuming computation in it.
    */
   @DeveloperAPI
   protected def getDependencies: Seq[Dependency[_]] = deps
 
   /**
+   * :: DeveloperAPI ::
    * Optionally overridden by subclasses to specify placement preferences.
    */
   @DeveloperAPI
@@ -522,6 +526,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
+   * :: DeveloperAPI ::
    * Return a new RDD by applying a function to each partition of this RDD. This is a variant of
    * mapPartitions that also passes the TaskContext into the closure.
    */
@@ -785,6 +790,7 @@ abstract class RDD[T: ClassTag](
   def count(): Long = sc.runJob(this, Utils.getIteratorSize _).sum
 
   /**
+   * :: Experimental ::
    * Approximate version of count() that returns a potentially incomplete result
    * within a timeout, even if not all tasks have finished.
    */
@@ -832,6 +838,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
+   * :: Experimental ::
    * Approximate version of countByValue().
    */
   @Experimental
@@ -855,6 +862,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
+   * :: Experimental ::
    * Return approximate number of distinct elements in the RDD.
    *
    * The accuracy of approximation can be controlled through the relative standard deviation

diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -29,6 +29,7 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
 }
 
 /**
+ * :: DeveloperAPI ::
  * The resulting RDD from a shuffle (e.g. repartitioning of data).
  * @param prev the parent RDD.
  * @param part the partitioner used to partition the RDD

diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -31,6 +31,7 @@ import org.apache.spark.annotations.DeveloperAPI
 import org.apache.spark.deploy.SparkHadoopUtil
 
 /**
+ * :: DeveloperAPI ::
  * Parses and holds information about inputFormat (and files) specified as a parameter.
  */
 @DeveloperAPI

diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -29,6 +29,7 @@ import org.apache.spark.annotations.DeveloperAPI
 import org.apache.spark.executor.TaskMetrics
 
 /**
+ * :: DeveloperAPI ::
  * A logger class to record runtime information for jobs in Spark. This class outputs one log file
  * for each Spark job, containing tasks start/stop and shuffle information. JobLogger is a subclass
  * of SparkListener, use addSparkListener to add JobLogger to a SparkContext after the SparkContext
@@ -39,7 +40,6 @@ import org.apache.spark.executor.TaskMetrics
  * to log application information as SparkListenerEvents. To enable this functionality, set
  * spark.eventLog.enabled to true.
  */
-
 @DeveloperAPI
 @deprecated("Log application information by setting spark.eventLog.enabled.", "1.0.0")
 class JobLogger(val user: String, val logDirName: String) extends SparkListener with Logging {

diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala
@@ -20,6 +20,7 @@ package org.apache.spark.scheduler
 import org.apache.spark.annotations.DeveloperAPI
 
 /**
+ * :: DeveloperAPI ::
  * A result of a job in the DAGScheduler.
  */
 @DeveloperAPI

diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -80,6 +80,7 @@ private[spark] case object SparkListenerShutdown extends SparkListenerEvent
 
 
 /**
+ * :: DeveloperAPI ::
  * Interface for listening to events from the Spark scheduler. Note that this is an internal
  * interface which might change in different Spark releases.
  */
@@ -143,6 +144,7 @@ trait SparkListener {
 }
 
 /**
+ * :: DeveloperAPI ::
  * Simple SparkListener that logs a few summary statistics when each stage completes
  */
 @DeveloperAPI