Skip to content

Commit

Permalink
Throw exception on cache
Browse files Browse the repository at this point in the history
  • Loading branch information
sryza committed Jan 31, 2015
1 parent 0f6c4eb commit 6e1932a
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ import org.apache.spark._
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.executor.{DataReadMethod, InputMetrics}
import org.apache.spark.executor.DataReadMethod
import org.apache.spark.rdd.HadoopRDD.HadoopMapPartitionsWithSplitRDD
import org.apache.spark.util.{NextIterator, Utils}
import org.apache.spark.scheduler.{HostTaskLocation, HDFSCacheTaskLocation}
import org.apache.spark.storage.StorageLevel

/**
* A Spark split class that wraps around a Hadoop InputSplit.
Expand Down Expand Up @@ -308,6 +309,14 @@ class HadoopRDD[K, V](
// Do nothing. Hadoop RDD should not be checkpointed.
}

override def persist(storageLevel: StorageLevel): this.type = {
if (storageLevel.deserialized) {
throw new SparkException("Can't cache HadoopRDDs as deserialized objects because Hadoop's" +
" RecordReader reuses the same Writable object for all records.")
}
super.persist(storageLevel)
}

def getConf: Configuration = getJobConf()
}

Expand Down

0 comments on commit 6e1932a

Please sign in to comment.