forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyspark-array-string.py
31 lines (25 loc) · 1.15 KB
/
pyspark-array-string.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# -*- coding: utf-8 -*-
"""
¿Como ejecutarlo?
- Version monoservidor (pandas_normal): python C:\DATOS\GITHUB_REPOS\pyspark-examples\SCRIPT.py
- Version multiservidor (pandas_api_on_spark): cmd /k C:\apps\spark-3.4.0-bin-hadoop3\bin\spark-submit --master "local[*]" C:\DATOS\GITHUB_REPOS\pyspark-examples\SCRIPT.py
"""
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[1]") \
.appName('SparkByExamples.com') \
.getOrCreate()
columns = ["name","languagesAtSchool","currentState"]
data = [("James,,Smith",["Java","Scala","C++"],"CA"), \
("Michael,Rose,",["Spark","Java","C++"],"NJ"), \
("Robert,,Williams",["CSharp","VB"],"NV")]
df = spark.createDataFrame(data=data,schema=columns)
df.printSchema()
df.show(truncate=False)
from pyspark.sql.functions import col, concat_ws
df2 = df.withColumn("languagesAtSchool",
concat_ws(",",col("languagesAtSchool")))
df2.printSchema()
df2.show(truncate=False)
df.createOrReplaceTempView("ARRAY_STRING")
spark.sql("select name, concat_ws(',',languagesAtSchool) as languagesAtSchool,currentState from ARRAY_STRING").show(truncate=False)