Skip to content

Commit

Permalink
Added doctest and method description in context.py
Browse files Browse the repository at this point in the history
Added doctest for method textFile and description for methods _initialize_context and _ensure_initialized in context.py

Author: Jyotiska NK <jyotiska123@gmail.com>

Closes apache#187 from jyotiska/pyspark_context and squashes the following commits:

356f945 [Jyotiska NK] Added doctest for textFile method in context.py
5b23686 [Jyotiska NK] Updated context.py with method descriptions
  • Loading branch information
jyotiska authored and conviva-zz committed Sep 4, 2014
1 parent cc2c68f commit 95bef5b
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,18 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()

# Initialize SparkContext in function to allow subclass specific initialization
def _initialize_context(self, jconf):
"""
Initialize SparkContext in function to allow subclass specific initialization
"""
return self._jvm.JavaSparkContext(jconf)

@classmethod
def _ensure_initialized(cls, instance=None, gateway=None):
"""
Checks whether a SparkContext is initialized or not.
Throws error if a SparkContext is already running.
"""
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = gateway or launch_gateway()
Expand Down Expand Up @@ -270,6 +276,13 @@ def textFile(self, name, minPartitions=None):
Read a text file from HDFS, a local file system (available on all
nodes), or any Hadoop-supported file system URI, and return it as an
RDD of Strings.
>>> path = os.path.join(tempdir, "sample-text.txt")
>>> with open(path, "w") as testFile:
... testFile.write("Hello world!")
>>> textFile = sc.textFile(path)
>>> textFile.collect()
[u'Hello world!']
"""
minPartitions = minPartitions or min(self.defaultParallelism, 2)
return RDD(self._jsc.textFile(name, minPartitions), self,
Expand Down

0 comments on commit 95bef5b

Please sign in to comment.