diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 8d8da1c7d0d6e..65d98bae7db3f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -4616,13 +4616,13 @@ def rand(seed: Optional[int] = None) -> Column: Examples -------- - >>> df = spark.range(2) - >>> df.withColumn('rand', rand(seed=42) * 3).show() # doctest: +SKIP + >>> from pyspark.sql import functions as F + >>> spark.range(0, 2, 1, 1).withColumn('rand', F.rand(seed=42) * 3).show() +---+------------------+ | id| rand| +---+------------------+ - | 0|1.4385751892400076| - | 1|1.7082186019706387| + | 0|1.8575681106759028| + | 1|1.5288056527339444| +---+------------------+ """ if seed is not None: @@ -4657,14 +4657,14 @@ def randn(seed: Optional[int] = None) -> Column: Examples -------- - >>> df = spark.range(2) - >>> df.withColumn('randn', randn(seed=42)).show() # doctest: +SKIP - +---+--------------------+ - | id| randn| - +---+--------------------+ - | 0|-0.04167221574820542| - | 1| 0.15241403986452778| - +---+--------------------+ + >>> from pyspark.sql import functions as F + >>> spark.range(0, 2, 1, 1).withColumn('randn', F.randn(seed=42)).show() + +---+------------------+ + | id| randn| + +---+------------------+ + | 0| 2.384479054241165| + | 1|0.1920934041293524| + +---+------------------+ """ if seed is not None: return _invoke_function("randn", seed) @@ -5159,26 +5159,27 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non Examples -------- - >>> df = spark.createDataFrame([10, 100, 1000], "INT") - >>> df.select(log(10.0, df.value).alias('ten')).show() # doctest: +SKIP - +---+ - |ten| - +---+ - |1.0| - |2.0| - |3.0| - +---+ + >>> from pyspark.sql import functions as F + >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)") + >>> df.select(F.log(2.0, df.value).alias('log2_value')).show() + +----------+ + |log2_value| + +----------+ + | 0.0| + | 1.0| + | 2.0| + +----------+ And Natural logarithm - >>> df.select(log(df.value)).show() # doctest: +SKIP - +-----------------+ - | ln(value)| - +-----------------+ - |2.302585092994046| - |4.605170185988092| - |4.605170185988092| - +-----------------+ + >>> df.select(F.log(df.value).alias('ln_value')).show() + +------------------+ + | ln_value| + +------------------+ + | 0.0| + |0.6931471805599453| + |1.3862943611198906| + +------------------+ """ if arg2 is None: return _invoke_function_over_columns("log", cast("ColumnOrName", arg1))