Skip to content

Commit

Permalink
Add identifier tags in comments to work around scaladocs bug
Browse files Browse the repository at this point in the history
The bug is that annotations and comments cannot both be displayed in the
summary page. The workaround is adding a special pattern that we grep for
as we post-process the DOM tree in JavaScript.

Example: A @DeveloperAPI annotated class's comment must begin with
":: Developer API ::"
  • Loading branch information
andrewor14 committed Apr 8, 2014
1 parent 99192ef commit f3954e0
Show file tree
Hide file tree
Showing 38 changed files with 109 additions and 25 deletions.
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/Aggregator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.apache.spark.annotations.DeveloperAPI
import org.apache.spark.util.collection.{AppendOnlyMap, ExternalAppendOnlyMap}

/**
* :: DeveloperAPI ::
* A set of functions used to aggregate data.
*
* @param createCombiner function to create the initial value of the aggregation.
Expand Down
5 changes: 5 additions & 0 deletions core/src/main/scala/org/apache/spark/Dependency.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.Serializer

/**
* :: DeveloperAPI ::
* Base class for dependencies.
*/
@DeveloperAPI
abstract class Dependency[T](val rdd: RDD[T]) extends Serializable


/**
* :: DeveloperAPI ::
* Base class for dependencies where each partition of the parent RDD is used by at most one
* partition of the child RDD. Narrow dependencies allow for pipelined execution.
*/
Expand All @@ -44,6 +46,7 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {


/**
* :: DeveloperAPI ::
* Represents a dependency on the output of a shuffle stage.
* @param rdd the parent RDD
* @param partitioner partitioner used to partition the shuffle output
Expand All @@ -63,6 +66,7 @@ class ShuffleDependency[K, V](


/**
* :: DeveloperAPI ::
* Represents a one-to-one dependency between partitions of the parent and child RDDs.
*/
@DeveloperAPI
Expand All @@ -72,6 +76,7 @@ class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {


/**
* :: DeveloperAPI ::
* Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs.
* @param rdd the parent RDD
* @param inStart the start of the range in the parent RDD
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/scala/org/apache/spark/FutureAction.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}

/**
* :: Experimental ::
* A future for the result of an action to support cancellation. This is an extension of the
* Scala Future interface to support cancellation.
*/
Expand Down Expand Up @@ -86,6 +87,7 @@ trait FutureAction[T] extends Future[T] {


/**
* :: Experimental ::
* A [[FutureAction]] holding the result of an action that triggers a single job. Examples include
* count, collect, reduce.
*/
Expand Down Expand Up @@ -151,6 +153,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:


/**
* :: Experimental ::
* A [[FutureAction]] for actions that could trigger multiple Spark jobs. Examples include take,
* takeSample. Cancellation works by setting the cancelled flag to true and interrupting the
* action thread if it is being blocked by a job.
Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/Logging.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.slf4j.impl.StaticLoggerBinder
import org.apache.spark.annotations.DeveloperAPI

/**
* :: DeveloperAPI ::
* Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows
* logging messages at different levels using methods that only evaluate parameters lazily if the
* log level is enabled.
Expand Down
5 changes: 5 additions & 0 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import org.apache.spark.ui.SparkUI
import org.apache.spark.util.{ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedHashMap, Utils}

/**
* :: DeveloperAPI ::
* Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
* cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
*
Expand All @@ -65,6 +66,7 @@ class SparkContext(config: SparkConf) extends Logging {
private[spark] var preferredNodeLocationData: Map[String, Set[SplitInfo]] = Map()

/**
* :: DeveloperAPI ::
* Alternative constructor for setting preferred locations where Spark will create executors.
*
* @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. Ca
Expand Down Expand Up @@ -716,6 +718,7 @@ class SparkContext(config: SparkConf) extends Logging {
}

/**
* :: DeveloperAPI ::
* Register a listener to receive up-calls from events that happen during execution.
*/
@DeveloperAPI
Expand Down Expand Up @@ -1028,6 +1031,7 @@ class SparkContext(config: SparkConf) extends Logging {
}

/**
* :: DeveloperAPI ::
* Run a job that can return approximate results.
*/
@DeveloperAPI
Expand All @@ -1046,6 +1050,7 @@ class SparkContext(config: SparkConf) extends Logging {
}

/**
*
* Submit a job for execution and return a FutureJob holding the result.
*/
@Experimental
Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/SparkEnv.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import org.apache.spark.storage._
import org.apache.spark.util.{AkkaUtils, Utils}

/**
* :: DeveloperAPI ::
* Holds all the runtime environment objects for a running Spark instance (either master or worker),
* including the serializer, Akka actor system, block manager, map output tracker, etc. Currently
* Spark code finds the SparkEnv through a thread-local variable, so each thread that accesses these
Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/TaskContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.spark.annotations.DeveloperAPI
import org.apache.spark.executor.TaskMetrics

/**
* :: DeveloperAPI ::
* Contextual information about a task which can be read or mutated during execution.
*/
@DeveloperAPI
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/scala/org/apache/spark/TaskEndReason.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.spark.executor.TaskMetrics
import org.apache.spark.storage.BlockManagerId

/**
* :: DeveloperAPI ::
* Various possible reasons why a task ended. The low-level TaskScheduler is supposed to retry
* tasks several times for "ephemeral" failures, and only report back failures that require some
* old stages to be resubmitted, such as shuffle map fetch failures.
Expand Down Expand Up @@ -52,6 +53,7 @@ case class ExceptionFailure(
extends TaskEndReason

/**
* :: DeveloperAPI ::
* The task finished successfully, but the result was lost from the executor's block manager before
* it was fetched.
*/
Expand All @@ -62,13 +64,15 @@ case object TaskResultLost extends TaskEndReason
case object TaskKilled extends TaskEndReason

/**
* :: DeveloperAPI ::
* The task failed because the executor that it was running on was lost. This may happen because
* the task crashed the JVM.
*/
@DeveloperAPI
case object ExecutorLostFailure extends TaskEndReason

/**
* :: DeveloperAPI ::
* We don't know why the task ended -- for example, because of a ClassNotFound exception when
* deserializing the task result.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ import org.apache.spark.SparkConf
import org.apache.spark.annotations.DeveloperAPI

/**
* An interface for all the broadcast implementations in Spark (to allow
* :: DeveloperAPI ::
* An interface for all the broadcast implementations in Spark (to allow
* multiple broadcast implementations). SparkContext uses a user-specified
* BroadcastFactory implementation to instantiate a particular broadcast for the
* entire Spark job.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.apache.spark.annotations.DeveloperAPI
import org.apache.spark.storage.{BlockId, BlockStatus}

/**
* :: DeveloperAPI ::
* Metrics tracked during the execution of a task.
*/
@DeveloperAPI
Expand Down Expand Up @@ -88,6 +89,7 @@ private[spark] object TaskMetrics {


/**
* :: DeveloperAPI ::
* Metrics pertaining to shuffle data read in a given task.
*/
@DeveloperAPI
Expand Down Expand Up @@ -126,6 +128,7 @@ class ShuffleReadMetrics extends Serializable {
}

/**
* :: DeveloperAPI ::
* Metrics pertaining to shuffle data written in a given task.
*/
@DeveloperAPI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.spark.SparkConf
import org.apache.spark.annotations.DeveloperAPI

/**
* :: DeveloperAPI ::
* CompressionCodec allows the customization of choosing different compression implementations
* to be used in block storage.
*
Expand Down Expand Up @@ -58,6 +59,7 @@ private[spark] object CompressionCodec {


/**
* :: DeveloperAPI ::
* LZF implementation of [[org.apache.spark.io.CompressionCodec]].
*
* Note: The wire protocol for this codec is not guaranteed to be compatible across versions
Expand All @@ -76,6 +78,7 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {


/**
* :: DeveloperAPI ::
* Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
* Block size can be configured by spark.io.compression.snappy.block.size.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.partial
import org.apache.spark.annotations.Experimental

/**
* :: Experimental ::
* A Double value with error bars and associated confidence.
*/
@Experimental
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
import org.apache.spark.annotations.Experimental

/**
* :: Experimental ::
* A set of asynchronous RDD actions available through an implicit conversion.
* Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
}

/**
* :: DeveloperAPI ::
* A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
* tuple with the list of values for that key.
*
Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
}

/**
* :: DeveloperAPI ::
* An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
* sources in HBase, or S3), using the older MapReduce API (`org.apache.hadoop.mapred`).
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class NewHadoopPartition(rddId: Int, val index: Int, @transient rawSplit: InputS
}

/**
* :: DeveloperAPI ::
* An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
* sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ private[spark] class PruneDependency[T](rdd: RDD[T], @transient partitionFilterF


/**
* :: DeveloperAPI ::
* A RDD used to prune RDD partitions/partitions so we can avoid launching tasks on
* all partitions. An example use case: If we know the RDD is partitioned by range,
* and the execution DAG has a filter on the key, we can avoid launching tasks
Expand Down
8 changes: 8 additions & 0 deletions core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -88,26 +88,30 @@ abstract class RDD[T: ClassTag](
// =======================================================================

/**
* :: DeveloperAPI ::
* Implemented by subclasses to compute a given partition.
*/
@DeveloperAPI
def compute(split: Partition, context: TaskContext): Iterator[T]

/**
* :: DeveloperAPI ::
* Implemented by subclasses to return the set of partitions in this RDD. This method will only
* be called once, so it is safe to implement a time-consuming computation in it.
*/
@DeveloperAPI
protected def getPartitions: Array[Partition]

/**
* :: DeveloperAPI ::
* Implemented by subclasses to return how this RDD depends on parent RDDs. This method will only
* be called once, so it is safe to implement a time-consuming computation in it.
*/
@DeveloperAPI
protected def getDependencies: Seq[Dependency[_]] = deps

/**
* :: DeveloperAPI ::
* Optionally overridden by subclasses to specify placement preferences.
*/
@DeveloperAPI
Expand Down Expand Up @@ -522,6 +526,7 @@ abstract class RDD[T: ClassTag](
}

/**
* :: DeveloperAPI ::
* Return a new RDD by applying a function to each partition of this RDD. This is a variant of
* mapPartitions that also passes the TaskContext into the closure.
*/
Expand Down Expand Up @@ -785,6 +790,7 @@ abstract class RDD[T: ClassTag](
def count(): Long = sc.runJob(this, Utils.getIteratorSize _).sum

/**
* :: Experimental ::
* Approximate version of count() that returns a potentially incomplete result
* within a timeout, even if not all tasks have finished.
*/
Expand Down Expand Up @@ -832,6 +838,7 @@ abstract class RDD[T: ClassTag](
}

/**
* :: Experimental ::
* Approximate version of countByValue().
*/
@Experimental
Expand All @@ -855,6 +862,7 @@ abstract class RDD[T: ClassTag](
}

/**
* :: Experimental ::
* Return approximate number of distinct elements in the RDD.
*
* The accuracy of approximation can be controlled through the relative standard deviation
Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
}

/**
* :: DeveloperAPI ::
* The resulting RDD from a shuffle (e.g. repartitioning of data).
* @param prev the parent RDD.
* @param part the partitioner used to partition the RDD
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.annotations.DeveloperAPI
import org.apache.spark.deploy.SparkHadoopUtil

/**
* :: DeveloperAPI ::
* Parses and holds information about inputFormat (and files) specified as a parameter.
*/
@DeveloperAPI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.apache.spark.annotations.DeveloperAPI
import org.apache.spark.executor.TaskMetrics

/**
* :: DeveloperAPI ::
* A logger class to record runtime information for jobs in Spark. This class outputs one log file
* for each Spark job, containing tasks start/stop and shuffle information. JobLogger is a subclass
* of SparkListener, use addSparkListener to add JobLogger to a SparkContext after the SparkContext
Expand All @@ -39,7 +40,6 @@ import org.apache.spark.executor.TaskMetrics
* to log application information as SparkListenerEvents. To enable this functionality, set
* spark.eventLog.enabled to true.
*/

@DeveloperAPI
@deprecated("Log application information by setting spark.eventLog.enabled.", "1.0.0")
class JobLogger(val user: String, val logDirName: String) extends SparkListener with Logging {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.scheduler
import org.apache.spark.annotations.DeveloperAPI

/**
* :: DeveloperAPI ::
* A result of a job in the DAGScheduler.
*/
@DeveloperAPI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ private[spark] case object SparkListenerShutdown extends SparkListenerEvent


/**
* :: DeveloperAPI ::
* Interface for listening to events from the Spark scheduler. Note that this is an internal
* interface which might change in different Spark releases.
*/
Expand Down Expand Up @@ -143,6 +144,7 @@ trait SparkListener {
}

/**
* :: DeveloperAPI ::
* Simple SparkListener that logs a few summary statistics when each stage completes
*/
@DeveloperAPI
Expand Down
Loading

0 comments on commit f3954e0

Please sign in to comment.