From 47cf0b1bf9ae5617fb552edef36d8e43a2b76c90 Mon Sep 17 00:00:00 2001 From: shuigedeng <981376577@qq.com> Date: Fri, 17 Sep 2021 09:40:28 +0800 Subject: [PATCH] [taotao-cloud-project-1236] update spark --- README.md | 60 ++++---- config/bigdata/spark3_0_1/install.sh | 10 +- gradle.properties | 2 +- scripts/build_starters.sh | 2 +- scripts/clean_starters.sh | 2 +- scripts/deploy_aliyun_starters.sh | 4 +- scripts/deploy_github_starters.sh | 4 +- scripts/deploy_sonatype_starters.sh | 4 +- .../taotao-cloud-spark/build.gradle | 1 - .../bigdata/spark/atguigu/core/Test.scala | 10 -- .../spark/atguigu/core/acc/Spark01_Acc.scala | 28 ---- .../spark/atguigu/core/acc/Spark02_Acc.scala | 34 ----- .../spark/atguigu/core/acc/Spark03_Acc.scala | 40 ------ .../core/acc/Spark04_Acc_WordCount.scala | 87 ------------ .../spark/atguigu/core/acc/Spark05_Bc.scala | 42 ------ .../spark/atguigu/core/acc/Spark06_Bc.scala | 36 ----- .../core/rdd/builder/Spark01_RDD_Memory.scala | 28 ---- .../rdd/builder/Spark01_RDD_Memory_Par1.scala | 28 ---- .../core/rdd/builder/Spark02_RDD_File.scala | 30 ---- .../core/rdd/builder/Spark02_RDD_File1.scala | 26 ---- .../rdd/builder/Spark02_RDD_File_Par.scala | 35 ----- .../rdd/builder/Spark02_RDD_File_Par1.scala | 38 ----- .../rdd/builder/Spark03_RDD_File_Par2.scala | 37 ----- .../core/rdd/dep/Spark01_RDD_Dep.scala | 31 ---- .../core/rdd/dep/Spark02_RDD_Dep.scala | 31 ---- .../core/rdd/io/Spark01_RDD_IO_Load.scala | 22 --- .../core/rdd/io/Spark01_RDD_IO_Save.scala | 25 ---- .../action/Spark01_RDD_Operator_Action.scala | 23 --- .../action/Spark02_RDD_Operator_Action.scala | 44 ------ .../action/Spark03_RDD_Operator_Action.scala | 27 ---- .../action/Spark04_RDD_Operator_Action.scala | 27 ---- .../action/Spark05_RDD_Operator_Action.scala | 26 ---- .../action/Spark06_RDD_Operator_Action.scala | 30 ---- .../action/Spark07_RDD_Operator_Action.scala | 36 ----- .../Spark01_RDD_Operator_Transform.scala | 36 ----- .../Spark01_RDD_Operator_Transform_Par.scala | 39 ------ .../Spark01_RDD_Operator_Transform_Part.scala | 24 ---- ...park01_Req1_HotCategoryTop10Analysis.scala | 108 -------------- ...ark02_Req1_HotCategoryTop10Analysis1.scala | 118 ---------------- ...ark03_Req1_HotCategoryTop10Analysis2.scala | 60 -------- ...ark04_Req1_HotCategoryTop10Analysis3.scala | 131 ----------------- ...Req2_HotCategoryTop10SessionAnalysis.scala | 90 ------------ .../req/Spark06_Req3_PageflowAnalysis.scala | 121 ---------------- .../spark/atguigu/core/test/Driver.scala | 39 ------ .../spark/atguigu/core/test/Executor2.scala | 25 ---- .../spark/atguigu/core/test/SubTask.scala | 11 -- .../spark/atguigu/core/test/Task.scala | 11 -- .../atguigu/core/wc/Spark01_WordCount.scala | 49 ------- .../atguigu/core/wc/Spark02_WordCount1.scala | 45 ------ .../atguigu/core/wc/Spark03_WordCount.scala | 117 ---------------- .../streaming/SparkStreaming13_Req31.scala | 92 ------------ .../bigdata/spark/atguigu/util/JDBCUtil.scala | 66 --------- .../src/main/python/PythonWordCount.py | 0 .../cloud/bigdata/spark/ScalaWordCount.scala | 22 +-- .../spark/atguigu/core/acc/Spark01_Acc.scala | 29 ++++ .../spark/atguigu/core/acc/Spark02_Acc.scala | 34 +++++ .../spark/atguigu/core/acc/Spark03_Acc.scala | 40 ++++++ .../core/acc/Spark04_Acc_WordCount.scala | 88 ++++++++++++ .../spark/atguigu/core/acc/Spark05_Bc.scala | 40 ++++++ .../spark/atguigu/core/acc/Spark06_Bc.scala | 35 +++++ .../application/WordCountApplication.scala | 5 +- .../core/framework/common/TApplication.scala | 3 +- .../core/framework/common/TController.scala | 0 .../atguigu/core/framework/common/TDao.scala | 3 +- .../core/framework/common/TService.scala | 0 .../controller/WordCountController.scala | 4 +- .../core/framework/dao/WordCountDao.scala | 3 +- .../framework/service/WordCountService.scala | 4 +- .../atguigu/core/framework/util/EnvUtil.scala | 0 .../core/rdd/builder/Spark01_RDD_Memory.scala | 28 ++++ .../rdd/builder/Spark01_RDD_Memory_Par.scala | 1 - .../rdd/builder/Spark01_RDD_Memory_Par1.scala | 28 ++++ .../core/rdd/builder/Spark02_RDD_File.scala | 29 ++++ .../core/rdd/builder/Spark02_RDD_File1.scala | 26 ++++ .../rdd/builder/Spark02_RDD_File_Par.scala | 35 +++++ .../rdd/builder/Spark02_RDD_File_Par1.scala | 38 +++++ .../rdd/builder/Spark03_RDD_File_Par2.scala | 37 +++++ .../core/rdd/dep/Spark01_RDD_Dep.scala | 31 ++++ .../core/rdd/dep/Spark02_RDD_Dep.scala | 31 ++++ .../core/rdd/io/Spark01_RDD_IO_Load.scala | 22 +++ .../core/rdd/io/Spark01_RDD_IO_Save.scala | 25 ++++ .../action/Spark01_RDD_Operator_Action.scala | 23 +++ .../action/Spark02_RDD_Operator_Action.scala | 44 ++++++ .../action/Spark03_RDD_Operator_Action.scala | 27 ++++ .../action/Spark04_RDD_Operator_Action.scala | 27 ++++ .../action/Spark05_RDD_Operator_Action.scala | 26 ++++ .../action/Spark06_RDD_Operator_Action.scala | 30 ++++ .../action/Spark07_RDD_Operator_Action.scala | 37 +++++ .../Spark01_RDD_Operator_Transform.scala | 36 +++++ .../Spark01_RDD_Operator_Transform_Par.scala | 38 +++++ .../Spark01_RDD_Operator_Transform_Part.scala | 23 +++ .../Spark01_RDD_Operator_Transform_Test.scala | 0 .../Spark02_RDD_Operator_Transform.scala | 0 .../Spark02_RDD_Operator_Transform_Test.scala | 0 .../Spark03_RDD_Operator_Transform.scala | 0 .../Spark03_RDD_Operator_Transform1.scala | 0 .../Spark04_RDD_Operator_Transform.scala | 0 .../Spark04_RDD_Operator_Transform1.scala | 0 .../Spark04_RDD_Operator_Transform2.scala | 0 .../Spark05_RDD_Operator_Transform.scala | 0 .../Spark05_RDD_Operator_Transform_Test.scala | 0 .../Spark06_RDD_Operator_Transform.scala | 0 .../Spark06_RDD_Operator_Transform1.scala | 0 .../Spark06_RDD_Operator_Transform_Test.scala | 0 .../Spark07_RDD_Operator_Transform.scala | 0 .../Spark07_RDD_Operator_Transform_Test.scala | 0 .../Spark08_RDD_Operator_Transform.scala | 0 .../Spark09_RDD_Operator_Transform.scala | 0 .../Spark10_RDD_Operator_Transform.scala | 0 .../Spark11_RDD_Operator_Transform.scala | 0 .../Spark12_RDD_Operator_Transform.scala | 0 .../Spark12_RDD_Operator_Transform1.scala | 0 .../Spark13_RDD_Operator_Transform.scala | 0 .../Spark13_RDD_Operator_Transform1.scala | 0 .../Spark14_RDD_Operator_Transform.scala | 0 .../Spark15_RDD_Operator_Transform.scala | 0 .../Spark16_RDD_Operator_Transform.scala | 0 .../Spark17_RDD_Operator_Transform.scala | 0 .../Spark17_RDD_Operator_Transform1.scala | 0 .../Spark17_RDD_Operator_Transform2.scala | 0 .../Spark18_RDD_Operator_Transform3.scala | 0 .../Spark19_RDD_Operator_Transform.scala | 0 .../Spark20_RDD_Operator_Transform.scala | 0 .../Spark21_RDD_Operator_Transform.scala | 0 .../Spark22_RDD_Operator_Transform.scala | 0 .../Spark23_RDD_Operator_Transform.scala | 0 .../operator/transform/Spark24_RDD_Req.scala | 0 .../core/rdd/part/Spark01_RDD_Part.scala | 0 .../rdd/persist/Spark01_RDD_Persist.scala | 0 .../rdd/persist/Spark02_RDD_Persist.scala | 0 .../rdd/persist/Spark03_RDD_Persist.scala | 0 .../rdd/persist/Spark04_RDD_Persist.scala | 0 .../rdd/persist/Spark05_RDD_Persist.scala | 0 .../rdd/persist/Spark06_RDD_Persist.scala | 0 .../core/rdd/serial/Spark01_RDD_Serial.scala | 0 ...park01_Req1_HotCategoryTop10Analysis.scala | 108 ++++++++++++++ ...ark02_Req1_HotCategoryTop10Analysis1.scala | 118 ++++++++++++++++ ...ark03_Req1_HotCategoryTop10Analysis2.scala | 60 ++++++++ ...ark04_Req1_HotCategoryTop10Analysis3.scala | 132 ++++++++++++++++++ ...Req2_HotCategoryTop10SessionAnalysis.scala | 91 ++++++++++++ .../req/Spark06_Req3_PageflowAnalysis.scala | 121 ++++++++++++++++ .../spark/atguigu/core/test/Driver.scala | 40 ++++++ .../spark/atguigu/core/test/Executor.scala | 1 - .../spark/atguigu/core/test/Executor2.scala | 25 ++++ .../spark/atguigu/core/test/SubTask.scala | 11 ++ .../spark/atguigu/core/test/Task.scala | 11 ++ .../atguigu/core/wc/Spark01_WordCount.scala | 49 +++++++ .../atguigu/core/wc/Spark02_WordCount1.scala | 45 ++++++ .../atguigu/core/wc/Spark03_WordCount.scala | 119 ++++++++++++++++ .../atguigu/sql/Spark01_SparkSQL_Basic.scala | 0 .../atguigu/sql/Spark02_SparkSQL_UDF.scala | 0 .../atguigu/sql/Spark03_SparkSQL_UDAF.scala | 0 .../atguigu/sql/Spark03_SparkSQL_UDAF1.scala | 0 .../atguigu/sql/Spark03_SparkSQL_UDAF2.scala | 0 .../atguigu/sql/Spark04_SparkSQL_JDBC.scala | 0 .../atguigu/sql/Spark05_SparkSQL_Hive.scala | 0 .../atguigu/sql/Spark06_SparkSQL_Test.scala | 0 .../atguigu/sql/Spark06_SparkSQL_Test1.scala | 0 .../atguigu/sql/Spark06_SparkSQL_Test2.scala | 0 .../SparkStreaming01_WordCount.scala | 0 .../streaming/SparkStreaming02_Queue.scala | 0 .../streaming/SparkStreaming03_DIY.scala | 0 .../streaming/SparkStreaming04_Kafka.scala | 0 .../streaming/SparkStreaming05_State.scala | 0 .../SparkStreaming06_State_Join.scala | 0 .../SparkStreaming06_State_Transform.scala | 0 .../SparkStreaming06_State_Window.scala | 0 .../SparkStreaming06_State_Window1.scala | 0 .../streaming/SparkStreaming07_Output.scala | 0 .../streaming/SparkStreaming07_Output1.scala | 0 .../streaming/SparkStreaming08_Close.scala | 0 .../streaming/SparkStreaming09_Resume.scala | 0 .../streaming/SparkStreaming10_MockData.scala | 0 .../streaming/SparkStreaming11_Req1.scala | 0 .../SparkStreaming11_Req1_BlackList.scala | 0 .../SparkStreaming11_Req1_BlackList1.scala | 0 .../streaming/SparkStreaming12_Req2.scala | 0 .../streaming/SparkStreaming13_Req3.scala | 0 .../streaming/SparkStreaming13_Req31.scala | 93 ++++++++++++ .../bigdata/spark/atguigu/util/JDBCUtil.scala | 65 +++++++++ 180 files changed, 2066 insertions(+), 2067 deletions(-) delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/Test.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala delete mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/python/PythonWordCount.py create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala (60%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala (80%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TController.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala (68%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TService.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala (69%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala (61%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala (79%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/framework/util/EnvUtil.scala (100%) create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala (97%) create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Test.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform_Test.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform2.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform_Test.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform_Test.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform_Test.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark08_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark09_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark10_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark11_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark14_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark15_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark16_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform2.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark18_RDD_Operator_Transform3.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark19_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark20_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark21_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark22_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark23_RDD_Operator_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark24_RDD_Req.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/part/Spark01_RDD_Part.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark01_RDD_Persist.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark02_RDD_Persist.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark03_RDD_Persist.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark04_RDD_Persist.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark05_RDD_Persist.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark06_RDD_Persist.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/serial/Spark01_RDD_Serial.scala (100%) create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala (99%) create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark01_SparkSQL_Basic.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark02_SparkSQL_UDF.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF2.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark04_SparkSQL_JDBC.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark05_SparkSQL_Hive.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test2.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming01_WordCount.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming02_Queue.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming03_DIY.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming04_Kafka.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming05_State.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Join.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Transform.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming08_Close.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming09_Resume.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming10_MockData.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList1.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming12_Req2.scala (100%) rename taotao-cloud-bigdata/taotao-cloud-spark/src/main/{java => scala}/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req3.scala (100%) create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala create mode 100644 taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala diff --git a/README.md b/README.md index 2b928b90a9..5986738fb9 100644 --- a/README.md +++ b/README.md @@ -20,23 +20,23 @@ - **大数据模块** 集成日志数据处理和分析、用户行为分析、推荐系统、离线/流式计算、数据仓库、数据湖等大数据处理 -- **微服务模块** 基于spring cloud alibab微服务基础脚手架框架,用于基础服务的集成和跟业务无关的基础技术集成, - 提供大量的starter作为技术底层支持,同时基础框架集中统一优化中间件相关服务及使用, +- **微服务模块** 基于**spring cloud alibaba**微服务基础脚手架框架,用于基础服务的集成和跟业务无关的基础技术集成, + 提供大量的**starters**作为技术底层支持,同时基础框架集中统一优化中间件相关服务及使用, 提供高性能,更方便的基础服务接口及工具,完全可以在实际工作中使用 -- **前端模块** 主要使用react进行前端开发、集成以taro为主的多端合一框架。以react antd 框架进行快速后台管理平台开发 +- **前端模块** 主要使用**react**进行前端开发、集成以**taro**为主的多端合一框架。以**react antd**框架进行快速后台管理平台开发 -- **python模块** 主要是集成了Django的web开发、家庭自动化框架原理的分析 +- **python模块** 主要是集成了**Django**的web开发、**homeassistant**家庭自动化框架原理的分析 -总之基于Spring Cloud Alibaba的微服务架构。旨在提供技术框架的基础能力的封装,减少开发工作,只关注业务 +总之基于**spring cloud alibaba**的微服务架构。旨在提供技术框架的基础能力的封装,减少开发工作,只关注业务 -## 2. springcloud微服务架构图 +## 2. spring cloud 微服务架构图 ![mark](./snapshot/springcloud微服务架构图.jpeg) -## 3. springcloud微服务分层图 +## 3. spring cloud 微服务分层图 ![mark](./snapshot/springcloud微服务分层图.png) @@ -45,7 +45,7 @@ Gradle: ``` dependencyManagement{ imports { - mavenBom "io.github.shuigedeng:taotao-cloud-dependencies:2021.9.2" + mavenBom "io.github.shuigedeng:taotao-cloud-dependencies:2021.9.3" } } @@ -59,7 +59,7 @@ Maven: io.github.shuigedeng taotao-cloud-dependencies - 2021.9.2 + 2021.9.3 pom import @@ -110,27 +110,27 @@ Guava | 29.0-jre ## 7. 功能特点 -* 微服务技术框架: 前后端分离的企业级微服务架构、主要针对解决微服务和业务开发时常见的**非功能性需求** -* 主体框架:采用最新的Spring Boot 2.5.4、Spring Cloud 2020.0.3、Spring Cloud Alibaba 2021.1版本进行设计 -* 统一注册:支持Nacos作为注册中心,实现多配置、分群组、分命名空间、多业务模块的注册和发现功能 -* 统一认证:统一Oauth2认证协议,采用jwt的方式,实现统一认证,完备的RBAC权限管理、数据权限处理、网关统一鉴权、灰度发布 -* 业务监控:利用Spring Boot Admin 监控各个独立服务的运行状态 -* 日志分析:集成kafka、ELK、prometheus实时监控日志(请求日志、系统日志、数据变更日志、用户日志) -* 分布式事务:集成spring cloud alibaba seata分布式事务处理 -* 业务熔断:采用spring cloud alibaba Sentinel实现业务熔断处理,避免服务之间出现雪崩 -* 链路追踪:自定义traceId的方式,实现简单的链路追踪功能、集成skywalking、sleuth、zipkin链路监控 -* 分布式任务:集成xxl-job分布式定时任务处理 -* 内部调用:集成了Feign和Dubbo两种模式支持内部调用,并且可以实现无缝切换 -* 身份注入:通过注解的方式,实现用户登录信息的快速注入 -* 在线文档:通过接入Knife4j,实现在线API文档的查看与调试 -* 消息中心:集成消息中间件RocketMQ、kafka,对业务进行异步处理 -* 业务分离:采用前后端分离的框架设计,前端采用react antd脚手架快速开放 -* 多租户功能:集成Mybatis Plus、jpa,实现saas多租户功能 -* 容器化支持: Docker、Kubernetes、Rancher2 支持 -* webflux支持: lambda、stream api、webflux 的生产实践 -* 开放平台: 提供应用管理,方便第三方系统接入,**支持多租户(应用隔离)** -* 组件化: 引入组件化的思想实现高内聚低耦合并且高度可配置化 -* 代码规范: 注重代码规范,严格控制包依赖 +* **微服务技术框架**: 前后端分离的企业级微服务架构、主要针对解决微服务和业务开发时常见的**非功能性需求** +* **主体框架**:采用最新的Spring Boot 2.5.4、Spring Cloud 2020.0.3、Spring Cloud Alibaba 2021.1版本进行设计 +* **统一注册**:支持Nacos作为注册中心,实现多配置、分群组、分命名空间、多业务模块的注册和发现功能 +* **统一认证**:统一Oauth2认证协议,采用jwt的方式,实现统一认证,完备的RBAC权限管理、数据权限处理、网关统一鉴权、灰度发布 +* **业务监控**:利用Spring Boot Admin 监控各个独立服务的运行状态 +* **日志分析**:集成kafka、ELK、prometheus实时监控日志(请求日志、系统日志、数据变更日志、用户日志) +* **分布式事务**:集成spring cloud alibaba seata分布式事务处理 +* **业务熔断**:采用spring cloud alibaba Sentinel实现业务熔断处理,避免服务之间出现雪崩 +* **链路追踪**:自定义traceId的方式,实现简单的链路追踪功能、集成skywalking、sleuth、zipkin链路监控 +* **分布式任务**:集成xxl-job分布式定时任务处理 +* **内部调用**:集成了Feign和Dubbo两种模式支持内部调用,并且可以实现无缝切换 +* **身份注入**:通过注解的方式,实现用户登录信息的快速注入 +* **在线文档**:通过接入Knife4j,实现在线API文档的查看与调试 +* **消息中心**:集成消息中间件RocketMQ、kafka,对业务进行异步处理 +* **业务分离**:采用前后端分离的框架设计,前端采用react antd脚手架快速开放 +* **多租户功能**:集成Mybatis Plus、jpa,实现saas多租户功能 +* **容器化支持**: Docker、Kubernetes、Rancher2 支持 +* **webflux**支持: lambda、stream api、webflux 的生产实践 +* **开放平台**: 提供应用管理,方便第三方系统接入,**支持多租户(应用隔离)** +* **组件化**: 引入组件化的思想实现高内聚低耦合并且高度可配置化 +* **代码规范**: 注重代码规范,严格控制包依赖 > PS: 借鉴了其他开源项目 diff --git a/config/bigdata/spark3_0_1/install.sh b/config/bigdata/spark3_0_1/install.sh index 9530f4ed88..fd26153453 100644 --- a/config/bigdata/spark3_0_1/install.sh +++ b/config/bigdata/spark3_0_1/install.sh @@ -35,14 +35,22 @@ spark.master.taotaocloud.com:8080 spark.worker.taotaocloud.com:8080 spark.task.taotaocloud.com:8080 + ######### 测试 +-- master local[2] +-- master spark://172.16.6.151:7077 +-- master yarn + +--deploy-mode client +--deploy-mode cluster + ./bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ --master yarn \ --deploy-mode cluster \ --driver-memory 4g \ --executor-memory 2g \ - --executor-cores 1 \ + --executor-cores 2 \ --queue default \ examples/jars/spark-examples*.jar \ 10 diff --git a/gradle.properties b/gradle.properties index 0c24ec06dd..321958e659 100644 --- a/gradle.properties +++ b/gradle.properties @@ -2,7 +2,7 @@ org.gradle.daemon=true org.gradle.jvmargs=-Xmx4096m -XX:MaxPermSize=4096m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 org.gradle.parallel=true kotlin.version=1.3.70 -version=2021.9.2 +version=2021.9.3 # GRADLE_USER_HOME/gradle.properties #signing.keyId=xxxxxx diff --git a/scripts/build_starters.sh b/scripts/build_starters.sh index 92dd98beba..fee0d6d374 100755 --- a/scripts/build_starters.sh +++ b/scripts/build_starters.sh @@ -5,7 +5,7 @@ function clean_starters() { do if [ -d $1"/"$file ];then cd $1"/"$file - gradle build + gradle build -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' fi done } diff --git a/scripts/clean_starters.sh b/scripts/clean_starters.sh index 8059e5ce44..dcdb765847 100755 --- a/scripts/clean_starters.sh +++ b/scripts/clean_starters.sh @@ -5,7 +5,7 @@ function clean_starters() { do if [ -d $1"/"$file ];then cd $1"/"$file - gradle clean + gradle clean -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' fi done } diff --git a/scripts/deploy_aliyun_starters.sh b/scripts/deploy_aliyun_starters.sh index 17555d61da..42daae38e5 100755 --- a/scripts/deploy_aliyun_starters.sh +++ b/scripts/deploy_aliyun_starters.sh @@ -2,7 +2,7 @@ function deploy_dependencies() { cd $1 - gradle publishMavenJavaPublicationToAliyunRepository -Dorg.gradle.java.home='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' + gradle publishMavenJavaPublicationToAliyunRepository -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' } function deploy_starters() { @@ -10,7 +10,7 @@ function deploy_starters() { do if [ -d $1"/"$file ];then cd $1"/"$file - gradle publishMavenJavaPublicationToAliyunRepository -Dorg.gradle.java.home='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' + gradle publishMavenJavaPublicationToAliyunRepository -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' fi done } diff --git a/scripts/deploy_github_starters.sh b/scripts/deploy_github_starters.sh index bc090893f1..dca9ec4841 100755 --- a/scripts/deploy_github_starters.sh +++ b/scripts/deploy_github_starters.sh @@ -2,7 +2,7 @@ function deploy_dependencies() { cd $1 - gradle publishMavenJavaPublicationToGitHubRepository -Dorg.gradle.java.home='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' + gradle publishMavenJavaPublicationToGitHubRepository -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' } function deploy_starters() { @@ -10,7 +10,7 @@ function deploy_starters() { do if [ -d $1"/"$file ];then cd $1"/"$file - gradle publishMavenJavaPublicationToGitHubRepository -Dorg.gradle.java.home='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' + gradle publishMavenJavaPublicationToGitHubRepository -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' fi done } diff --git a/scripts/deploy_sonatype_starters.sh b/scripts/deploy_sonatype_starters.sh index 0cd476777d..db38c869ec 100755 --- a/scripts/deploy_sonatype_starters.sh +++ b/scripts/deploy_sonatype_starters.sh @@ -2,7 +2,7 @@ function deploy_dependencies() { cd $1 - gradle publishAllPublicationsToSonatypeRepository -Dorg.gradle.java.home='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' + gradle publishAllPublicationsToSonatypeRepository -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' } function deploy_starters() { @@ -10,7 +10,7 @@ function deploy_starters() { do if [ -d $1"/"$file ];then cd $1"/"$file - gradle publishAllPublicationsToSonatypeRepository -Dorg.gradle.java.home='/Library/Java/JavaVirtualMachines/jdk1.8.0_181.jdk/Contents/Home' + gradle publishAllPublicationsToSonatypeRepository -Dorg.gradle.java.home='/Users/dengtao/software/jdk-11.0.7/Contents/Home' fi done } diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/build.gradle b/taotao-cloud-bigdata/taotao-cloud-spark/build.gradle index 9cd39997b5..3e4372659f 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/build.gradle +++ b/taotao-cloud-bigdata/taotao-cloud-spark/build.gradle @@ -24,7 +24,6 @@ dependencies { implementation "org.scala-lang:scala-library:2.12.8" implementation "org.scala-lang:scala-reflect:2.12.8" - implementation "org.slf4j:slf4j-api:1.7.25" implementation "org.slf4j:slf4j-log4j12:1.7.25" } diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/Test.scala deleted file mode 100644 index e9f89e2fd9..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/Test.scala +++ /dev/null @@ -1,10 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core - -object Test { - - def main(args: Array[String]): Unit = { - - println("Hello Spark") - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala deleted file mode 100644 index aceeae98b2..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala +++ /dev/null @@ -1,28 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.acc - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_Acc { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Acc") - val sc = new SparkContext(sparConf) - - val rdd = sc.makeRDD(List(1,2,3,4)) - - // reduce : 分区内计算,分区间计算 - //val i: Int = rdd.reduce(_+_) - //println(i) - var sum = 0 - rdd.foreach( - num => { - sum += num - } - ) - println("sum = " + sum) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala deleted file mode 100644 index 560fe80e3a..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.acc - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_Acc { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Acc") - val sc = new SparkContext(sparConf) - - val rdd = sc.makeRDD(List(1,2,3,4)) - - // 获取系统累加器 - // Spark默认就提供了简单数据聚合的累加器 - val sumAcc = sc.longAccumulator("sum") - - //sc.doubleAccumulator - //sc.collectionAccumulator - - rdd.foreach( - num => { - // 使用累加器 - sumAcc.add(num) - } - ) - - // 获取累加器的值 - println(sumAcc.value) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala deleted file mode 100644 index a2237bdb3c..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.acc - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark03_Acc { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Acc") - val sc = new SparkContext(sparConf) - - val rdd = sc.makeRDD(List(1,2,3,4)) - - // 获取系统累加器 - // Spark默认就提供了简单数据聚合的累加器 - val sumAcc = sc.longAccumulator("sum") - - //sc.doubleAccumulator - //sc.collectionAccumulator - - val mapRDD = rdd.map( - num => { - // 使用累加器 - sumAcc.add(num) - num - } - ) - - // 获取累加器的值 - // 少加:转换算子中调用累加器,如果没有行动算子的话,那么不会执行 - // 多加:转换算子中调用累加器,如果没有行动算子的话,那么不会执行 - // 一般情况下,累加器会放置在行动算子进行操作 - mapRDD.collect() - mapRDD.collect() - println(sumAcc.value) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala deleted file mode 100644 index 9bf3b365d6..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala +++ /dev/null @@ -1,87 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.acc - -import org.apache.spark.util.AccumulatorV2 -import org.apache.spark.{SparkConf, SparkContext} - -import scala.collection.mutable - -object Spark04_Acc_WordCount { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Acc") - val sc = new SparkContext(sparConf) - - val rdd = sc.makeRDD(List("hello", "spark", "hello")) - - // 累加器 : WordCount - // 创建累加器对象 - val wcAcc = new MyAccumulator() - // 向Spark进行注册 - sc.register(wcAcc, "wordCountAcc") - - rdd.foreach( - word => { - // 数据的累加(使用累加器) - wcAcc.add(word) - } - ) - - // 获取累加器累加的结果 - println(wcAcc.value) - - sc.stop() - - } - /* - 自定义数据累加器:WordCount - - 1. 继承AccumulatorV2, 定义泛型 - IN : 累加器输入的数据类型 String - OUT : 累加器返回的数据类型 mutable.Map[String, Long] - - 2. 重写方法(6) - */ - class MyAccumulator extends AccumulatorV2[String, mutable.Map[String, Long]] { - - private var wcMap = mutable.Map[String, Long]() - - // 判断是否初始状态 - override def isZero: Boolean = { - wcMap.isEmpty - } - - override def copy(): AccumulatorV2[String, mutable.Map[String, Long]] = { - new MyAccumulator() - } - - override def reset(): Unit = { - wcMap.clear() - } - - // 获取累加器需要计算的值 - override def add(word: String): Unit = { - val newCnt = wcMap.getOrElse(word, 0L) + 1 - wcMap.update(word, newCnt) - } - - // Driver合并多个累加器 - override def merge(other: AccumulatorV2[String, mutable.Map[String, Long]]): Unit = { - - val map1 = this.wcMap - val map2 = other.value - - map2.foreach{ - case ( word, count ) => { - val newCount = map1.getOrElse(word, 0L) + count - map1.update(word, newCount) - } - } - } - - // 累加器结果 - override def value: mutable.Map[String, Long] = { - wcMap - } - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala deleted file mode 100644 index 7b078dc26e..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.acc - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -import scala.collection.mutable - -object Spark05_Bc { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Acc") - val sc = new SparkContext(sparConf) - - val rdd1 = sc.makeRDD(List( - ("a", 1),("b", 2),("c", 3) - )) -// val rdd2 = sc.makeRDD(List( -// ("a", 4),("b", 5),("c", 6) -// )) - val map = mutable.Map(("a", 4),("b", 5),("c", 6)) - - - - // join会导致数据量几何增长,并且会影响shuffle的性能,不推荐使用 - //val joinRDD: RDD[(String, (Int, Int))] = rdd1.join(rdd2) - //joinRDD.collect().foreach(println) - // (a, 1), (b, 2), (c, 3) - // (a, (1,4)),(b, (2,5)),(c, (3,6)) - rdd1.map { - case (w, c) => { - val l: Int = map.getOrElse(w, 0) - (w, (c, l)) - } - }.collect().foreach(println) - - - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala deleted file mode 100644 index 5cdcd4762e..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala +++ /dev/null @@ -1,36 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.acc - -import org.apache.spark.broadcast.Broadcast -import org.apache.spark.{SparkConf, SparkContext} - -import scala.collection.mutable - -object Spark06_Bc { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Acc") - val sc = new SparkContext(sparConf) - - val rdd1 = sc.makeRDD(List( - ("a", 1),("b", 2),("c", 3) - )) - val map = mutable.Map(("a", 4),("b", 5),("c", 6)) - - // 封装广播变量 - val bc: Broadcast[mutable.Map[String, Int]] = sc.broadcast(map) - - rdd1.map { - case (w, c) => { - // 方法广播变量 - val l: Int = bc.value.getOrElse(w, 0) - (w, (c, l)) - } - }.collect().foreach(println) - - - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala deleted file mode 100644 index 8b72c1c004..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala +++ /dev/null @@ -1,28 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_Memory { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - // 从内存中创建RDD,将内存中集合的数据作为处理的数据源 - val seq = Seq[Int](1,2,3,4) - - // parallelize : 并行 - //val rdd: RDD[Int] = sc.parallelize(seq) - // makeRDD方法在底层实现时其实就是调用了rdd对象的parallelize方法。 - val rdd: RDD[Int] = sc.makeRDD(seq) - - rdd.collect().foreach(println) - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala deleted file mode 100644 index d58be1cab1..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala +++ /dev/null @@ -1,28 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_Memory_Par1 { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - - // 【1,2】,【3,4】 - //val rdd = sc.makeRDD(List(1,2,3,4), 2) - // 【1】,【2】,【3,4】 - //val rdd = sc.makeRDD(List(1,2,3,4), 3) - // 【1】,【2,3】,【4,5】 - val rdd = sc.makeRDD(List(1,2,3,4,5), 3) - - // 将处理的数据保存成分区文件 - rdd.saveAsTextFile("output") - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala deleted file mode 100644 index 120b6ca828..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_RDD_File { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - // 从文件中创建RDD,将文件中的数据作为处理的数据源 - // path路径默认以当前环境的根路径为基准。可以写绝对路径,也可以写相对路径 - //sc.textFile("D:\\mineworkspace\\idea\\classes\\atguigu-classes\\datas\\1.txt") - //val rdd: RDD[String] = sc.textFile("datas/1.txt") - // path路径可以是文件的具体路径,也可以目录名称 - //val rdd = sc.textFile("datas") - // path路径还可以使用通配符 * - //val rdd = sc.textFile("datas/1*.txt") - // path还可以是分布式存储系统路径:HDFS - val rdd = sc.textFile("hdfs://linux1:8020/test.txt") - rdd.collect().foreach(println) - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala deleted file mode 100644 index bde1d72076..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_RDD_File1 { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - // 从文件中创建RDD,将文件中的数据作为处理的数据源 - - // textFile : 以行为单位来读取数据,读取的数据都是字符串 - // wholeTextFiles : 以文件为单位读取数据 - // 读取的结果表示为元组,第一个元素表示文件路径,第二个元素表示文件内容 - val rdd = sc.wholeTextFiles("datas") - - rdd.collect().foreach(println) - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala deleted file mode 100644 index 0db9e32bd8..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala +++ /dev/null @@ -1,35 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_RDD_File_Par { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - // textFile可以将文件作为数据处理的数据源,默认也可以设定分区。 - // minPartitions : 最小分区数量 - // math.min(defaultParallelism, 2) - //val rdd = sc.textFile("datas/1.txt") - // 如果不想使用默认的分区数量,可以通过第二个参数指定分区数 - // Spark读取文件,底层其实使用的就是Hadoop的读取方式 - // 分区数量的计算方式: - // totalSize = 7 - // goalSize = 7 / 2 = 3(byte) - - // 7 / 3 = 2...1 (1.1) + 1 = 3(分区) - - // - val rdd = sc.textFile("datas/1.txt", 2) - - rdd.saveAsTextFile("output") - - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala deleted file mode 100644 index 0f68f2fb40..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_RDD_File_Par1 { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - // TODO 数据分区的分配 - // 1. 数据以行为单位进行读取 - // spark读取文件,采用的是hadoop的方式读取,所以一行一行读取,和字节数没有关系 - // 2. 数据读取时以偏移量为单位,偏移量不会被重复读取 - /* - 1@@ => 012 - 2@@ => 345 - 3 => 6 - - */ - // 3. 数据分区的偏移量范围的计算 - // 0 => [0, 3] => 12 - // 1 => [3, 6] => 3 - // 2 => [6, 7] => - - // 【1,2】,【3】,【】 - val rdd = sc.textFile("datas/1.txt", 2) - - rdd.saveAsTextFile("output") - - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala deleted file mode 100644 index 90ca94b957..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark03_RDD_File_Par2 { - - def main(args: Array[String]): Unit = { - - // TODO 准备环境 - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") - val sc = new SparkContext(sparkConf) - - // TODO 创建RDD - - // 14byte / 2 = 7byte - // 14 / 7 = 2(分区) - - /* - 1234567@@ => 012345678 - 89@@ => 9101112 - 0 => 13 - - [0, 7] => 1234567 - [7, 14] => 890 - - */ - - // 如果数据源为多个文件,那么计算分区时以文件为单位进行分区 - val rdd = sc.textFile("datas/word.txt", 2) - - rdd.saveAsTextFile("output") - - - // TODO 关闭环境 - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala deleted file mode 100644 index 2f3c23b31a..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.dep - -import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.rdd.RDD - -object Spark01_RDD_Dep { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") - val sc = new SparkContext(sparConf) - - val lines: RDD[String] = sc.textFile("datas/word.txt") - println(lines.toDebugString) - println("*************************") - val words: RDD[String] = lines.flatMap(_.split(" ")) - println(words.toDebugString) - println("*************************") - val wordToOne = words.map(word=>(word,1)) - println(wordToOne.toDebugString) - println("*************************") - val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_+_) - println(wordToSum.toDebugString) - println("*************************") - val array: Array[(String, Int)] = wordToSum.collect() - array.foreach(println) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala deleted file mode 100644 index 3fc0d36c19..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.dep - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_RDD_Dep { - - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("Dep") - val sc = new SparkContext(sparConf) - - val lines: RDD[String] = sc.textFile("datas/word.txt") - println(lines.dependencies) - println("*************************") - val words: RDD[String] = lines.flatMap(_.split(" ")) - println(words.dependencies) - println("*************************") - val wordToOne = words.map(word=>(word,1)) - println(wordToOne.dependencies) - println("*************************") - val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_+_) - println(wordToSum.dependencies) - println("*************************") - val array: Array[(String, Int)] = wordToSum.collect() - array.foreach(println) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala deleted file mode 100644 index 889365fbe6..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.io - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_IO_Load { - - def main(args: Array[String]): Unit = { - val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") - val sc = new SparkContext(sparConf) - - val rdd = sc.textFile("output1") - println(rdd.collect().mkString(",")) - - val rdd1 = sc.objectFile[(String, Int)]("output2") - println(rdd1.collect().mkString(",")) - - val rdd2 = sc.sequenceFile[String, Int]("output3") - println(rdd2.collect().mkString(",")) - - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala deleted file mode 100644 index 38844a03b3..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala +++ /dev/null @@ -1,25 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.io - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_IO_Save { - - def main(args: Array[String]): Unit = { - val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") - val sc = new SparkContext(sparConf) - - val rdd = sc.makeRDD( - List( - ("a", 1), - ("b", 2), - ("c", 3) - ) - ) - - rdd.saveAsTextFile("output1") - rdd.saveAsObjectFile("output2") - rdd.saveAsSequenceFile("output3") - - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala deleted file mode 100644 index 2161bdc88d..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala +++ /dev/null @@ -1,23 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - val rdd = sc.makeRDD(List(1,2,3,4)) - - // TODO - 行动算子 - // 所谓的行动算子,其实就是触发作业(Job)执行的方法 - // 底层代码调用的是环境对象的runJob方法 - // 底层代码中会创建ActiveJob,并提交执行。 - rdd.collect() - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala deleted file mode 100644 index c2fe83baff..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - val rdd = sc.makeRDD(List(1,2,3,4)) - - // TODO - 行动算子 - - // reduce - //val i: Int = rdd.reduce(_+_) - //println(i) - - // collect : 方法会将不同分区的数据按照分区顺序采集到Driver端内存中,形成数组 - //val ints: Array[Int] = rdd.collect() - //println(ints.mkString(",")) - - // count : 数据源中数据的个数 - val cnt = rdd.count() - println(cnt) - - // first : 获取数据源中数据的第一个 - val first = rdd.first() - println(first) - - // take : 获取N个数据 - val ints: Array[Int] = rdd.take(3) - println(ints.mkString(",")) - - // takeOrdered : 数据排序后,取N个数据 - val rdd1 = sc.makeRDD(List(4,2,3,1)) - val ints1: Array[Int] = rdd1.takeOrdered(3) - println(ints1.mkString(",")) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala deleted file mode 100644 index a438393ba1..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala +++ /dev/null @@ -1,27 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark03_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - val rdd = sc.makeRDD(List(1,2,3,4),2) - - // TODO - 行动算子 - - //10 + 13 + 17 = 40 - // aggregateByKey : 初始值只会参与分区内计算 - // aggregate : 初始值会参与分区内计算,并且和参与分区间计算 - //val result = rdd.aggregate(10)(_+_, _+_) - val result = rdd.fold(10)(_+_) - - println(result) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala deleted file mode 100644 index 2ddc0cb895..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala +++ /dev/null @@ -1,27 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark04_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - //val rdd = sc.makeRDD(List(1,1,1,4),2) - val rdd = sc.makeRDD(List( - ("a", 1),("a", 2),("a", 3) - )) - - // TODO - 行动算子 - - //val intToLong: collection.Map[Int, Long] = rdd.countByValue() - //println(intToLong) - val stringToLong: collection.Map[String, Long] = rdd.countByKey() - println(stringToLong) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala deleted file mode 100644 index 03209c79fb..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark05_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - //val rdd = sc.makeRDD(List(1,1,1,4),2) - val rdd = sc.makeRDD(List( - ("a", 1),("a", 2),("a", 3) - )) - - // TODO - 行动算子 - rdd.saveAsTextFile("output") - rdd.saveAsObjectFile("output1") - // saveAsSequenceFile方法要求数据的格式必须为K-V类型 - rdd.saveAsSequenceFile("output2") - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala deleted file mode 100644 index f19e08dff1..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark06_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - val rdd = sc.makeRDD(List(1,2,3,4)) - - // foreach 其实是Driver端内存集合的循环遍历方法 - rdd.collect().foreach(println) - println("******************") - // foreach 其实是Executor端内存数据打印 - rdd.foreach(println) - - // 算子 : Operator(操作) - // RDD的方法和Scala集合对象的方法不一样 - // 集合对象的方法都是在同一个节点的内存中完成的。 - // RDD的方法可以将计算逻辑发送到Executor端(分布式节点)执行 - // 为了区分不同的处理效果,所以将RDD的方法称之为算子。 - // RDD的方法外部的操作都是在Driver端执行的,而方法内部的逻辑代码是在Executor端执行。 - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala deleted file mode 100644 index 700f1c9cfd..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala +++ /dev/null @@ -1,36 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action - -import org.apache.spark.{SparkConf, SparkContext} - -object Spark07_RDD_Operator_Action { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - val rdd = sc.makeRDD(List[Int]()) - - val user = new User() - - // SparkException: Task not serializable - // NotSerializableException: com.atguigu.bigdata.spark.core.rdd.operator.action.Spark07_RDD_Operator_Action$User - - // RDD算子中传递的函数是会包含闭包操作,那么就会进行检测功能 - // 闭包检测 - rdd.foreach( - num => { - println("age = " + (user.age + num)) - } - ) - - sc.stop() - - } - //class User extends Serializable { - // 样例类在编译时,会自动混入序列化特质(实现可序列化接口) - //case class User() { - class User { - var age : Int = 30 - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala deleted file mode 100644 index dd17e077c6..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala +++ /dev/null @@ -1,36 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.transform - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_Operator_Transform { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - // TODO 算子 - map - - val rdd = sc.makeRDD(List(1,2,3,4)) - // 1,2,3,4 - // 2,4,6,8 - - // 转换函数 - def mapFunction(num:Int): Int = { - num * 2 - } - - //val mapRDD: RDD[Int] = rdd.map(mapFunction) - //val mapRDD: RDD[Int] = rdd.map((num:Int)=>{num*2}) - //val mapRDD: RDD[Int] = rdd.map((num:Int)=>num*2) - //val mapRDD: RDD[Int] = rdd.map((num)=>num*2) - //val mapRDD: RDD[Int] = rdd.map(num=>num*2) - val mapRDD: RDD[Int] = rdd.map(_*2) - - mapRDD.collect().foreach(println) - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala deleted file mode 100644 index 6774ad5bed..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.transform - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_Operator_Transform_Par { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - // TODO 算子 - map - - // 1. rdd的计算一个分区内的数据是一个一个执行逻辑 - // 只有前面一个数据全部的逻辑执行完毕后,才会执行下一个数据。 - // 分区内数据的执行是有序的。 - // 2. 不同分区数据计算是无序的。 - val rdd = sc.makeRDD(List(1,2,3,4),2) - - val mapRDD = rdd.map( - num => { - println(">>>>>>>> " + num) - num - } - ) - val mapRDD1 = mapRDD.map( - num => { - println("######" + num) - num - } - ) - - mapRDD1.collect() - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala deleted file mode 100644 index 637f86fab5..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.transform - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_RDD_Operator_Transform_Part { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") - val sc = new SparkContext(sparkConf) - - // TODO 算子 - map - val rdd = sc.makeRDD(List(1,2,3,4),2) - // 【1,2】,【3,4】 - rdd.saveAsTextFile("output") - val mapRDD = rdd.map(_*2) - // 【2,4】,【6,8】 - mapRDD.saveAsTextFile("output1") - - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala deleted file mode 100644 index 57a2912179..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala +++ /dev/null @@ -1,108 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.req - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_Req1_HotCategoryTop10Analysis { - - def main(args: Array[String]): Unit = { - - // TODO : Top10热门品类 - val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") - val sc = new SparkContext(sparConf) - - // 1. 读取原始日志数据 - val actionRDD = sc.textFile("datas/user_visit_action.txt") - - // 2. 统计品类的点击数量:(品类ID,点击数量) - val clickActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - datas(6) != "-1" - } - ) - - val clickCountRDD: RDD[(String, Int)] = clickActionRDD.map( - action => { - val datas = action.split("_") - (datas(6), 1) - } - ).reduceByKey(_ + _) - - // 3. 统计品类的下单数量:(品类ID,下单数量) - val orderActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - datas(8) != "null" - } - ) - - // orderid => 1,2,3 - // 【(1,1),(2,1),(3,1)】 - val orderCountRDD = orderActionRDD.flatMap( - action => { - val datas = action.split("_") - val cid = datas(8) - val cids = cid.split(",") - cids.map(id=>(id, 1)) - } - ).reduceByKey(_+_) - - // 4. 统计品类的支付数量:(品类ID,支付数量) - val payActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - datas(10) != "null" - } - ) - - // orderid => 1,2,3 - // 【(1,1),(2,1),(3,1)】 - val payCountRDD = payActionRDD.flatMap( - action => { - val datas = action.split("_") - val cid = datas(10) - val cids = cid.split(",") - cids.map(id=>(id, 1)) - } - ).reduceByKey(_+_) - - // 5. 将品类进行排序,并且取前10名 - // 点击数量排序,下单数量排序,支付数量排序 - // 元组排序:先比较第一个,再比较第二个,再比较第三个,依此类推 - // ( 品类ID, ( 点击数量, 下单数量, 支付数量 ) ) - // - // cogroup = connect + group - val cogroupRDD: RDD[(String, (Iterable[Int], Iterable[Int], Iterable[Int]))] = - clickCountRDD.cogroup(orderCountRDD, payCountRDD) - val analysisRDD = cogroupRDD.mapValues{ - case ( clickIter, orderIter, payIter ) => { - - var clickCnt = 0 - val iter1 = clickIter.iterator - if ( iter1.hasNext ) { - clickCnt = iter1.next() - } - var orderCnt = 0 - val iter2 = orderIter.iterator - if ( iter2.hasNext ) { - orderCnt = iter2.next() - } - var payCnt = 0 - val iter3 = payIter.iterator - if ( iter3.hasNext ) { - payCnt = iter3.next() - } - - ( clickCnt, orderCnt, payCnt ) - } - } - - val resultRDD = analysisRDD.sortBy(_._2, false).take(10) - - // 6. 将结果采集到控制台打印出来 - resultRDD.foreach(println) - - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala deleted file mode 100644 index 57263b3a46..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala +++ /dev/null @@ -1,118 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.req - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_Req1_HotCategoryTop10Analysis1 { - - def main(args: Array[String]): Unit = { - - // TODO : Top10热门品类 - val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") - val sc = new SparkContext(sparConf) - - // Q : actionRDD重复使用 - // Q : cogroup性能可能较低 - - // 1. 读取原始日志数据 - val actionRDD = sc.textFile("datas/user_visit_action.txt") - actionRDD.cache() - - // 2. 统计品类的点击数量:(品类ID,点击数量) - val clickActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - datas(6) != "-1" - } - ) - - val clickCountRDD: RDD[(String, Int)] = clickActionRDD.map( - action => { - val datas = action.split("_") - (datas(6), 1) - } - ).reduceByKey(_ + _) - - // 3. 统计品类的下单数量:(品类ID,下单数量) - val orderActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - datas(8) != "null" - } - ) - - // orderid => 1,2,3 - // 【(1,1),(2,1),(3,1)】 - val orderCountRDD = orderActionRDD.flatMap( - action => { - val datas = action.split("_") - val cid = datas(8) - val cids = cid.split(",") - cids.map(id=>(id, 1)) - } - ).reduceByKey(_+_) - - // 4. 统计品类的支付数量:(品类ID,支付数量) - val payActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - datas(10) != "null" - } - ) - - // orderid => 1,2,3 - // 【(1,1),(2,1),(3,1)】 - val payCountRDD = payActionRDD.flatMap( - action => { - val datas = action.split("_") - val cid = datas(10) - val cids = cid.split(",") - cids.map(id=>(id, 1)) - } - ).reduceByKey(_+_) - - // (品类ID, 点击数量) => (品类ID, (点击数量, 0, 0)) - // (品类ID, 下单数量) => (品类ID, (0, 下单数量, 0)) - // => (品类ID, (点击数量, 下单数量, 0)) - // (品类ID, 支付数量) => (品类ID, (0, 0, 支付数量)) - // => (品类ID, (点击数量, 下单数量, 支付数量)) - // ( 品类ID, ( 点击数量, 下单数量, 支付数量 ) ) - - // 5. 将品类进行排序,并且取前10名 - // 点击数量排序,下单数量排序,支付数量排序 - // 元组排序:先比较第一个,再比较第二个,再比较第三个,依此类推 - // ( 品类ID, ( 点击数量, 下单数量, 支付数量 ) ) - // - val rdd1 = clickCountRDD.map{ - case ( cid, cnt ) => { - (cid, (cnt, 0, 0)) - } - } - val rdd2 = orderCountRDD.map{ - case ( cid, cnt ) => { - (cid, (0, cnt, 0)) - } - } - val rdd3 = payCountRDD.map{ - case ( cid, cnt ) => { - (cid, (0, 0, cnt)) - } - } - - // 将三个数据源合并在一起,统一进行聚合计算 - val soruceRDD: RDD[(String, (Int, Int, Int))] = rdd1.union(rdd2).union(rdd3) - - val analysisRDD = soruceRDD.reduceByKey( - ( t1, t2 ) => { - ( t1._1+t2._1, t1._2 + t2._2, t1._3 + t2._3 ) - } - ) - - val resultRDD = analysisRDD.sortBy(_._2, false).take(10) - - // 6. 将结果采集到控制台打印出来 - resultRDD.foreach(println) - - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala deleted file mode 100644 index d11a4362ab..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.req - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark03_Req1_HotCategoryTop10Analysis2 { - - def main(args: Array[String]): Unit = { - - // TODO : Top10热门品类 - val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") - val sc = new SparkContext(sparConf) - - // Q : 存在大量的shuffle操作(reduceByKey) - // reduceByKey 聚合算子,spark会提供优化,缓存 - - // 1. 读取原始日志数据 - val actionRDD = sc.textFile("datas/user_visit_action.txt") - - // 2. 将数据转换结构 - // 点击的场合 : ( 品类ID,( 1, 0, 0 ) ) - // 下单的场合 : ( 品类ID,( 0, 1, 0 ) ) - // 支付的场合 : ( 品类ID,( 0, 0, 1 ) ) - val flatRDD: RDD[(String, (Int, Int, Int))] = actionRDD.flatMap( - action => { - val datas = action.split("_") - if (datas(6) != "-1") { - // 点击的场合 - List((datas(6), (1, 0, 0))) - } else if (datas(8) != "null") { - // 下单的场合 - val ids = datas(8).split(",") - ids.map(id => (id, (0, 1, 0))) - } else if (datas(10) != "null") { - // 支付的场合 - val ids = datas(10).split(",") - ids.map(id => (id, (0, 0, 1))) - } else { - Nil - } - } - ) - - // 3. 将相同的品类ID的数据进行分组聚合 - // ( 品类ID,( 点击数量, 下单数量, 支付数量 ) ) - val analysisRDD = flatRDD.reduceByKey( - (t1, t2) => { - ( t1._1+t2._1, t1._2 + t2._2, t1._3 + t2._3 ) - } - ) - - // 4. 将统计结果根据数量进行降序处理,取前10名 - val resultRDD = analysisRDD.sortBy(_._2, false).take(10) - - // 5. 将结果采集到控制台打印出来 - resultRDD.foreach(println) - - sc.stop() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala deleted file mode 100644 index 30f451088d..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala +++ /dev/null @@ -1,131 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.req - -import org.apache.spark.rdd.RDD -import org.apache.spark.util.AccumulatorV2 -import org.apache.spark.{SparkConf, SparkContext} - -import scala.collection.mutable - -object Spark04_Req1_HotCategoryTop10Analysis3 { - - def main(args: Array[String]): Unit = { - - // TODO : Top10热门品类 - val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") - val sc = new SparkContext(sparConf) - - // 1. 读取原始日志数据 - val actionRDD = sc.textFile("datas/user_visit_action.txt") - - val acc = new HotCategoryAccumulator - sc.register(acc, "hotCategory") - - // 2. 将数据转换结构 - actionRDD.foreach( - action => { - val datas = action.split("_") - if (datas(6) != "-1") { - // 点击的场合 - acc.add((datas(6), "click")) - } else if (datas(8) != "null") { - // 下单的场合 - val ids = datas(8).split(",") - ids.foreach( - id => { - acc.add( (id, "order") ) - } - ) - } else if (datas(10) != "null") { - // 支付的场合 - val ids = datas(10).split(",") - ids.foreach( - id => { - acc.add( (id, "pay") ) - } - ) - } - } - ) - - val accVal: mutable.Map[String, HotCategory] = acc.value - val categories: mutable.Iterable[HotCategory] = accVal.map(_._2) - - val sort = categories.toList.sortWith( - (left, right) => { - if ( left.clickCnt > right.clickCnt ) { - true - } else if (left.clickCnt == right.clickCnt) { - if ( left.orderCnt > right.orderCnt ) { - true - } else if (left.orderCnt == right.orderCnt) { - left.payCnt > right.payCnt - } else { - false - } - } else { - false - } - } - ) - - // 5. 将结果采集到控制台打印出来 - sort.take(10).foreach(println) - - sc.stop() - } - case class HotCategory( cid:String, var clickCnt : Int, var orderCnt : Int, var payCnt : Int ) - /** - * 自定义累加器 - * 1. 继承AccumulatorV2,定义泛型 - * IN : ( 品类ID, 行为类型 ) - * OUT : mutable.Map[String, HotCategory] - * 2. 重写方法(6) - */ - class HotCategoryAccumulator extends AccumulatorV2[(String, String), mutable.Map[String, HotCategory]]{ - - private val hcMap = mutable.Map[String, HotCategory]() - - override def isZero: Boolean = { - hcMap.isEmpty - } - - override def copy(): AccumulatorV2[(String, String), mutable.Map[String, HotCategory]] = { - new HotCategoryAccumulator() - } - - override def reset(): Unit = { - hcMap.clear() - } - - override def add(v: (String, String)): Unit = { - val cid = v._1 - val actionType = v._2 - val category: HotCategory = hcMap.getOrElse(cid, HotCategory(cid, 0,0,0)) - if ( actionType == "click" ) { - category.clickCnt += 1 - } else if (actionType == "order") { - category.orderCnt += 1 - } else if (actionType == "pay") { - category.payCnt += 1 - } - hcMap.update(cid, category) - } - - override def merge(other: AccumulatorV2[(String, String), mutable.Map[String, HotCategory]]): Unit = { - val map1 = this.hcMap - val map2 = other.value - - map2.foreach{ - case ( cid, hc ) => { - val category: HotCategory = map1.getOrElse(cid, HotCategory(cid, 0,0,0)) - category.clickCnt += hc.clickCnt - category.orderCnt += hc.orderCnt - category.payCnt += hc.payCnt - map1.update(cid, category) - } - } - } - - override def value: mutable.Map[String, HotCategory] = hcMap - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala deleted file mode 100644 index 5e6828cb9e..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.req - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark05_Req2_HotCategoryTop10SessionAnalysis { - - def main(args: Array[String]): Unit = { - - // TODO : Top10热门品类 - val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") - val sc = new SparkContext(sparConf) - - val actionRDD = sc.textFile("datas/user_visit_action.txt") - actionRDD.cache() - val top10Ids: Array[String] = top10Category(actionRDD) - - // 1. 过滤原始数据,保留点击和前10品类ID - val filterActionRDD = actionRDD.filter( - action => { - val datas = action.split("_") - if ( datas(6) != "-1" ) { - top10Ids.contains(datas(6)) - } else { - false - } - } - ) - - // 2. 根据品类ID和sessionid进行点击量的统计 - val reduceRDD: RDD[((String, String), Int)] = filterActionRDD.map( - action => { - val datas = action.split("_") - ((datas(6), datas(2)), 1) - } - ).reduceByKey(_ + _) - - // 3. 将统计的结果进行结构的转换 - // (( 品类ID,sessionId ),sum) => ( 品类ID,(sessionId, sum) ) - val mapRDD = reduceRDD.map{ - case ( (cid, sid), sum ) => { - ( cid, (sid, sum) ) - } - } - - // 4. 相同的品类进行分组 - val groupRDD: RDD[(String, Iterable[(String, Int)])] = mapRDD.groupByKey() - - // 5. 将分组后的数据进行点击量的排序,取前10名 - val resultRDD = groupRDD.mapValues( - iter => { - iter.toList.sortBy(_._2)(Ordering.Int.reverse).take(10) - } - ) - - resultRDD.collect().foreach(println) - - - sc.stop() - } - def top10Category(actionRDD:RDD[String]) = { - val flatRDD: RDD[(String, (Int, Int, Int))] = actionRDD.flatMap( - action => { - val datas = action.split("_") - if (datas(6) != "-1") { - // 点击的场合 - List((datas(6), (1, 0, 0))) - } else if (datas(8) != "null") { - // 下单的场合 - val ids = datas(8).split(",") - ids.map(id => (id, (0, 1, 0))) - } else if (datas(10) != "null") { - // 支付的场合 - val ids = datas(10).split(",") - ids.map(id => (id, (0, 0, 1))) - } else { - Nil - } - } - ) - - val analysisRDD = flatRDD.reduceByKey( - (t1, t2) => { - ( t1._1+t2._1, t1._2 + t2._2, t1._3 + t2._3 ) - } - ) - - analysisRDD.sortBy(_._2, false).take(10).map(_._1) - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala deleted file mode 100644 index dd46d6e2c9..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala +++ /dev/null @@ -1,121 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.req - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark06_Req3_PageflowAnalysis { - - def main(args: Array[String]): Unit = { - - // TODO : Top10热门品类 - val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") - val sc = new SparkContext(sparConf) - - val actionRDD = sc.textFile("datas/user_visit_action.txt") - - val actionDataRDD = actionRDD.map( - action => { - val datas = action.split("_") - UserVisitAction( - datas(0), - datas(1).toLong, - datas(2), - datas(3).toLong, - datas(4), - datas(5), - datas(6).toLong, - datas(7).toLong, - datas(8), - datas(9), - datas(10), - datas(11), - datas(12).toLong - ) - } - ) - actionDataRDD.cache() - - // TODO 对指定的页面连续跳转进行统计 - // 1-2,2-3,3-4,4-5,5-6,6-7 - val ids = List[Long](1,2,3,4,5,6,7) - val okflowIds: List[(Long, Long)] = ids.zip(ids.tail) - - // TODO 计算分母 - val pageidToCountMap: Map[Long, Long] = actionDataRDD.filter( - action => { - ids.init.contains(action.page_id) - } - ).map( - action => { - (action.page_id, 1L) - } - ).reduceByKey(_ + _).collect().toMap - - // TODO 计算分子 - - // 根据session进行分组 - val sessionRDD: RDD[(String, Iterable[UserVisitAction])] = actionDataRDD.groupBy(_.session_id) - - // 分组后,根据访问时间进行排序(升序) - val mvRDD: RDD[(String, List[((Long, Long), Int)])] = sessionRDD.mapValues( - iter => { - val sortList: List[UserVisitAction] = iter.toList.sortBy(_.action_time) - - // 【1,2,3,4】 - // 【1,2】,【2,3】,【3,4】 - // 【1-2,2-3,3-4】 - // Sliding : 滑窗 - // 【1,2,3,4】 - // 【2,3,4】 - // zip : 拉链 - val flowIds: List[Long] = sortList.map(_.page_id) - val pageflowIds: List[(Long, Long)] = flowIds.zip(flowIds.tail) - - // 将不合法的页面跳转进行过滤 - pageflowIds.filter( - t => { - okflowIds.contains(t) - } - ).map( - t => { - (t, 1) - } - ) - } - ) - // ((1,2),1) - val flatRDD: RDD[((Long, Long), Int)] = mvRDD.map(_._2).flatMap(list=>list) - // ((1,2),1) => ((1,2),sum) - val dataRDD = flatRDD.reduceByKey(_+_) - - // TODO 计算单跳转换率 - // 分子除以分母 - dataRDD.foreach{ - case ( (pageid1, pageid2), sum ) => { - val lon: Long = pageidToCountMap.getOrElse(pageid1, 0L) - - println(s"页面${pageid1}跳转到页面${pageid2}单跳转换率为:" + ( sum.toDouble/lon )) - } - } - - - sc.stop() - } - - //用户访问动作表 - case class UserVisitAction( - date: String,//用户点击行为的日期 - user_id: Long,//用户的ID - session_id: String,//Session的ID - page_id: Long,//某个页面的ID - action_time: String,//动作的时间点 - search_keyword: String,//用户搜索的关键词 - click_category_id: Long,//某一个商品品类的ID - click_product_id: Long,//某一个商品的ID - order_category_ids: String,//一次订单中所有品类的ID集合 - order_product_ids: String,//一次订单中所有商品的ID集合 - pay_category_ids: String,//一次支付中所有品类的ID集合 - pay_product_ids: String,//一次支付中所有商品的ID集合 - city_id: Long - )//城市 id -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala deleted file mode 100644 index ec56656476..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.test - -import java.io.{ObjectOutputStream, OutputStream} -import java.net.Socket - -object Driver { - - def main(args: Array[String]): Unit = { - // 连接服务器 - val client1 = new Socket("localhost", 9999) - val client2 = new Socket("localhost", 8888) - - val task = new Task() - - val out1: OutputStream = client1.getOutputStream - val objOut1 = new ObjectOutputStream(out1) - - val subTask = new SubTask() - subTask.logic = task.logic - subTask.datas = task.datas.take(2) - - objOut1.writeObject(subTask) - objOut1.flush() - objOut1.close() - client1.close() - - val out2: OutputStream = client2.getOutputStream - val objOut2 = new ObjectOutputStream(out2) - - val subTask1 = new SubTask() - subTask1.logic = task.logic - subTask1.datas = task.datas.takeRight(2) - objOut2.writeObject(subTask1) - objOut2.flush() - objOut2.close() - client2.close() - println("客户端数据发送完毕") - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala deleted file mode 100644 index 05ea3fb88b..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala +++ /dev/null @@ -1,25 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.test - -import java.io.{InputStream, ObjectInputStream} -import java.net.{ServerSocket, Socket} - -object Executor2 { - - def main(args: Array[String]): Unit = { - - // 启动服务器,接收数据 - val server = new ServerSocket(8888) - println("服务器启动,等待接收数据") - - // 等待客户端的连接 - val client: Socket = server.accept() - val in: InputStream = client.getInputStream - val objIn = new ObjectInputStream(in) - val task: SubTask = objIn.readObject().asInstanceOf[SubTask] - val ints: List[Int] = task.compute() - println("计算节点[8888]计算的结果为:" + ints) - objIn.close() - client.close() - server.close() - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala deleted file mode 100644 index 6c92bdd3d5..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.test - -class SubTask extends Serializable { - var datas : List[Int] = _ - var logic : (Int)=>Int = _ - - // 计算 - def compute() = { - datas.map(logic) - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala deleted file mode 100644 index 18c31c5e47..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.test - -class Task extends Serializable { - - val datas = List(1,2,3,4) - - //val logic = ( num:Int )=>{ num * 2 } - val logic : (Int)=>Int = _ * 2 - - -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala deleted file mode 100644 index 196db1f3fe..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.wc - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark01_WordCount { - - def main(args: Array[String]): Unit = { - - // Application - // Spark框架 - // TODO 建立和Spark框架的连接 - // JDBC : Connection - val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") - val sc = new SparkContext(sparConf) - - // TODO 执行业务操作 - - // 1. 读取文件,获取一行一行的数据 - // hello world - val lines: RDD[String] = sc.textFile("datas") - - // 2. 将一行数据进行拆分,形成一个一个的单词(分词) - // 扁平化:将整体拆分成个体的操作 - // "hello world" => hello, world, hello, world - val words: RDD[String] = lines.flatMap(_.split(" ")) - - // 3. 将数据根据单词进行分组,便于统计 - // (hello, hello, hello), (world, world) - val wordGroup: RDD[(String, Iterable[String])] = words.groupBy(word=>word) - - // 4. 对分组后的数据进行转换 - // (hello, hello, hello), (world, world) - // (hello, 3), (world, 2) - val wordToCount = wordGroup.map { - case ( word, list ) => { - (word, list.size) - } - } - - // 5. 将转换结果采集到控制台打印出来 - val array: Array[(String, Int)] = wordToCount.collect() - array.foreach(println) - - // TODO 关闭连接 - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala deleted file mode 100644 index 760a7a114e..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.wc - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -object Spark02_WordCount1 { - - def main(args: Array[String]): Unit = { - - // Application - // Spark框架 - // TODO 建立和Spark框架的连接 - // JDBC : Connection - val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") - val sc = new SparkContext(sparConf) - - // TODO 执行业务操作 - - // 1. 读取文件,获取一行一行的数据 - // hello world - val lines: RDD[String] = sc.textFile("datas") - - // 2. 将一行数据进行拆分,形成一个一个的单词(分词) - // 扁平化:将整体拆分成个体的操作 - // "hello world" => hello, world, hello, world - val words: RDD[String] = lines.flatMap(_.split(" ")) - - // 3. 将单词进行结构的转换,方便统计 - // word => (word, 1) - val wordToOne = words.map(word=>(word,1)) - - // 4. 将转换后的数据进行分组聚合 - // 相同key的value进行聚合操作 - // (word, 1) => (word, sum) - val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_+_) - - // 5. 将转换结果采集到控制台打印出来 - val array: Array[(String, Int)] = wordToSum.collect() - array.foreach(println) - - // TODO 关闭连接 - sc.stop() - - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala deleted file mode 100644 index 96e265e9cb..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala +++ /dev/null @@ -1,117 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.core.wc - -import org.apache.spark.rdd.RDD -import org.apache.spark.{SparkConf, SparkContext} - -import scala.collection.mutable - -object Spark03_WordCount { - def main(args: Array[String]): Unit = { - - val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") - val sc = new SparkContext(sparConf) - - wordcount91011(sc) - - sc.stop() - - } - - // groupBy - def wordcount1(sc : SparkContext): Unit = { - - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val group: RDD[(String, Iterable[String])] = words.groupBy(word=>word) - val wordCount: RDD[(String, Int)] = group.mapValues(iter=>iter.size) - } - - // groupByKey - def wordcount2(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordOne = words.map((_,1)) - val group: RDD[(String, Iterable[Int])] = wordOne.groupByKey() - val wordCount: RDD[(String, Int)] = group.mapValues(iter=>iter.size) - } - - // reduceByKey - def wordcount3(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordOne = words.map((_,1)) - val wordCount: RDD[(String, Int)] = wordOne.reduceByKey(_+_) - } - - // aggregateByKey - def wordcount4(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordOne = words.map((_,1)) - val wordCount: RDD[(String, Int)] = wordOne.aggregateByKey(0)(_+_, _+_) - } - - // foldByKey - def wordcount5(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordOne = words.map((_,1)) - val wordCount: RDD[(String, Int)] = wordOne.foldByKey(0)(_+_) - } - - // combineByKey - def wordcount6(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordOne = words.map((_,1)) - val wordCount: RDD[(String, Int)] = wordOne.combineByKey( - v=>v, - (x:Int, y) => x + y, - (x:Int, y:Int) => x + y - ) - } - - // countByKey - def wordcount7(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordOne = words.map((_,1)) - val wordCount: collection.Map[String, Long] = wordOne.countByKey() - } - - // countByValue - def wordcount8(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - val wordCount: collection.Map[String, Long] = words.countByValue() - } - - // reduce, aggregate, fold - def wordcount91011(sc : SparkContext): Unit = { - val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) - val words = rdd.flatMap(_.split(" ")) - - // 【(word, count),(word, count)】 - // word => Map[(word,1)] - val mapWord = words.map( - word => { - mutable.Map[String, Long]((word,1)) - } - ) - - val wordCount = mapWord.reduce( - (map1, map2) => { - map2.foreach{ - case (word, count) => { - val newCount = map1.getOrElse(word, 0L) + count - map1.update(word, newCount) - } - } - map1 - } - ) - - println(wordCount) - } - -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala deleted file mode 100644 index 8419e01e7d..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.streaming - -import java.io.{File, FileWriter, PrintWriter} -import java.text.SimpleDateFormat - -import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} -import org.apache.spark.SparkConf -import org.apache.spark.streaming.dstream.InputDStream -import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} -import org.apache.spark.streaming.{Seconds, StreamingContext} - -import scala.collection.mutable.ListBuffer - -object SparkStreaming13_Req31 { - - def main(args: Array[String]): Unit = { - - val sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming") - val ssc = new StreamingContext(sparkConf, Seconds(5)) - - val kafkaPara: Map[String, Object] = Map[String, Object]( - ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "linux1:9092,linux2:9092,linux3:9092", - ConsumerConfig.GROUP_ID_CONFIG -> "atguigu", - "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", - "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer" - ) - - val kafkaDataDS: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( - ssc, - LocationStrategies.PreferConsistent, - ConsumerStrategies.Subscribe[String, String](Set("atguiguNew"), kafkaPara) - ) - val adClickData = kafkaDataDS.map( - kafkaData => { - val data = kafkaData.value() - val datas = data.split(" ") - AdClickData(datas(0),datas(1),datas(2),datas(3),datas(4)) - } - ) - - // 最近一分钟,每10秒计算一次 - // 12:01 => 12:00 - // 12:11 => 12:10 - // 12:19 => 12:10 - // 12:25 => 12:20 - // 12:59 => 12:50 - - // 55 => 50, 49 => 40, 32 => 30 - // 55 / 10 * 10 => 50 - // 49 / 10 * 10 => 40 - // 32 / 10 * 10 => 30 - - // 这里涉及窗口的计算 - val reduceDS = adClickData.map( - data => { - val ts = data.ts.toLong - val newTS = ts / 10000 * 10000 - ( newTS, 1 ) - } - ).reduceByKeyAndWindow((x:Int,y:Int)=>{x+y}, Seconds(60), Seconds(10)) - - //reduceDS.print() - reduceDS.foreachRDD( - rdd => { - val list = ListBuffer[String]() - - val datas: Array[(Long, Int)] = rdd.sortByKey(true).collect() - datas.foreach{ - case ( time, cnt ) => { - - val timeString = new SimpleDateFormat("mm:ss").format(new java.util.Date(time.toLong)) - - list.append(s"""{"xtime":"${timeString}", "yval":"${cnt}"}""") - } - } - - // 输出文件 - val out = new PrintWriter(new FileWriter(new File("D:\\mineworkspace\\idea\\classes\\atguigu-classes\\datas\\adclick\\adclick.json"))) - out.println("["+list.mkString(",")+"]") - out.flush() - out.close() - } - ) - - - ssc.start() - ssc.awaitTermination() - } - // 广告点击数据 - case class AdClickData( ts:String, area:String, city:String, user:String, ad:String ) - -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala deleted file mode 100644 index 353862f5a8..0000000000 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.taotao.cloud.bigdata.spark.atguigu.util - -import java.sql.{Connection, PreparedStatement} -import java.util.Properties - -import com.alibaba.druid.pool.DruidDataSourceFactory -import javax.sql.DataSource - -object JDBCUtil { - //初始化连接池 - var dataSource: DataSource = init() - - //初始化连接池方法 - def init(): DataSource = { - val properties = new Properties() - properties.setProperty("driverClassName", "com.mysql.jdbc.Driver") - properties.setProperty("url", "jdbc:mysql://linux1:3306/spark-streaming?useUnicode=true&characterEncoding=UTF-8") - properties.setProperty("username", "root") - properties.setProperty("password", "123123") - properties.setProperty("maxActive", "50") - DruidDataSourceFactory.createDataSource(properties) - } - - //获取MySQL连接 - def getConnection: Connection = { - dataSource.getConnection - } - - //执行SQL语句,单条数据插入 - def executeUpdate(connection: Connection, sql: String, params: Array[Any]): Int = { - var rtn = 0 - var pstmt: PreparedStatement = null - try { - connection.setAutoCommit(false) - pstmt = connection.prepareStatement(sql) - - if (params != null && params.length > 0) { - for (i <- params.indices) { - pstmt.setObject(i + 1, params(i)) - } - } - rtn = pstmt.executeUpdate() - connection.commit() - pstmt.close() - } catch { - case e: Exception => e.printStackTrace() - } - rtn - } - - def isExist(connection: Connection, sql: String, params: Array[Any]): Boolean = { - var flag: Boolean = false - var pstmt: PreparedStatement = null - try { - pstmt = connection.prepareStatement(sql) - for (i <- params.indices) { - pstmt.setObject(i + 1, params(i)) - } - flag = pstmt.executeQuery().next() - pstmt.close() - } catch { - case e: Exception => e.printStackTrace() - } - flag - } -} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/python/PythonWordCount.py b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/python/PythonWordCount.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/ScalaWordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/ScalaWordCount.scala index eed984a7a0..a13ea84967 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/ScalaWordCount.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/ScalaWordCount.scala @@ -29,13 +29,16 @@ import org.apache.spark.{SparkConf, SparkContext} * 3.上传jar包提交集群运行 *

* ./spark-submit \ - * --master spark://127.0.0.1:7077 \ + * --master local[2] \ + * --deploy-mode cluster \ * --class com.taotao.cloud.spark.ScalaWordCount \ - * --executor-memory 512m \ - * --total-executor-cores 2 \ - * /root/spark/jar/taotao-cloud-spark-1.0-all.jar \ - * hdfs://127.0.0.1/spark/wordcount/input \ - * hdfs://127.0.0.1/spark/wordcount/output + * --driver-memory 2g \ + * --executor-memory 1g \ + * --executor-cores 2 \ + * --queue default \ + * /opt/bigdata/spark-3.0.0-bin-hadoop3.2/test/jar/taotao-cloud-spark-1.0-all.jar \ + * /opt/bigdata/spark-3.0.0-bin-hadoop3.2/test/input \ + * /opt/bigdata/spark-3.0.0-bin-hadoop3.2/test/output *

* * @author shuigedeng @@ -47,14 +50,11 @@ object ScalaWordCount { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setAppName("WordCountApp") - .setMaster("local[1]") +// .setMaster("local[1]") val context = new SparkContext(conf) - - val value = context.textFile("/") - - context.textFile("/Users/shuigedeng/spark/input") + context.textFile(args(0)) .flatMap(_.split(" ")) .map((_, 1)) .reduceByKey(_ + _) diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala new file mode 100644 index 0000000000..2040bc0538 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark01_Acc.scala @@ -0,0 +1,29 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.acc + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_Acc { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Acc") + val sc = new SparkContext(sparConf) + + val rdd: RDD[Int] = sc.makeRDD(List(1, 2, 3, 4)) + + // reduce : 分区内计算,分区间计算 + //val i: Int = rdd.reduce(_+_) + //println(i) + var sum = 0 + rdd.foreach( + num => { + sum += num + } + ) + println("sum = " + sum) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala new file mode 100644 index 0000000000..2c31de62d7 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark02_Acc.scala @@ -0,0 +1,34 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.acc + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_Acc { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Acc") + val sc = new SparkContext(sparConf) + + val rdd = sc.makeRDD(List(1, 2, 3, 4)) + + // 获取系统累加器 + // Spark默认就提供了简单数据聚合的累加器 + val sumAcc = sc.longAccumulator("sum") + + //sc.doubleAccumulator + //sc.collectionAccumulator + + rdd.foreach( + num => { + // 使用累加器 + sumAcc.add(num) + } + ) + + // 获取累加器的值 + println(sumAcc.value) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala new file mode 100644 index 0000000000..6a2cfd0477 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark03_Acc.scala @@ -0,0 +1,40 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.acc + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark03_Acc { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Acc") + val sc = new SparkContext(sparConf) + + val rdd = sc.makeRDD(List(1, 2, 3, 4)) + + // 获取系统累加器 + // Spark默认就提供了简单数据聚合的累加器 + val sumAcc = sc.longAccumulator("sum") + + //sc.doubleAccumulator + //sc.collectionAccumulator + + val mapRDD = rdd.map( + num => { + // 使用累加器 + sumAcc.add(num) + num + } + ) + + // 获取累加器的值 + // 少加:转换算子中调用累加器,如果没有行动算子的话,那么不会执行 + // 多加:转换算子中调用累加器,如果没有行动算子的话,那么不会执行 + // 一般情况下,累加器会放置在行动算子进行操作 + mapRDD.collect() + mapRDD.collect() + println(sumAcc.value) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala new file mode 100644 index 0000000000..9c169b4f9a --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark04_Acc_WordCount.scala @@ -0,0 +1,88 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.acc + +import org.apache.spark.util.AccumulatorV2 +import org.apache.spark.{SparkConf, SparkContext} + +import scala.collection.mutable + +object Spark04_Acc_WordCount { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Acc") + val sc = new SparkContext(sparConf) + + val rdd = sc.makeRDD(List("hello", "spark", "hello")) + + // 累加器 : WordCount + // 创建累加器对象 + val wcAcc = new MyAccumulator() + // 向Spark进行注册 + sc.register(wcAcc, "wordCountAcc") + + rdd.foreach( + word => { + // 数据的累加(使用累加器) + wcAcc.add(word) + } + ) + + // 获取累加器累加的结果 + println(wcAcc.value) + + sc.stop() + + } + + /* + 自定义数据累加器:WordCount + + 1. 继承AccumulatorV2, 定义泛型 + IN : 累加器输入的数据类型 String + OUT : 累加器返回的数据类型 mutable.Map[String, Long] + + 2. 重写方法(6) + */ + class MyAccumulator extends AccumulatorV2[String, mutable.Map[String, Long]] { + + private var wcMap = mutable.Map[String, Long]() + + // 判断是否初始状态 + override def isZero: Boolean = { + wcMap.isEmpty + } + + override def copy(): AccumulatorV2[String, mutable.Map[String, Long]] = { + new MyAccumulator() + } + + override def reset(): Unit = { + wcMap.clear() + } + + // 获取累加器需要计算的值 + override def add(word: String): Unit = { + val newCnt = wcMap.getOrElse(word, 0L) + 1 + wcMap.update(word, newCnt) + } + + // Driver合并多个累加器 + override def merge(other: AccumulatorV2[String, mutable.Map[String, Long]]): Unit = { + + val map1 = this.wcMap + val map2 = other.value + + map2.foreach { + case (word, count) => { + val newCount = map1.getOrElse(word, 0L) + count + map1.update(word, newCount) + } + } + } + + // 累加器结果 + override def value: mutable.Map[String, Long] = { + wcMap + } + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala new file mode 100644 index 0000000000..7b50ed5423 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark05_Bc.scala @@ -0,0 +1,40 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.acc + +import org.apache.spark.{SparkConf, SparkContext} + +import scala.collection.mutable + +object Spark05_Bc { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Acc") + val sc = new SparkContext(sparConf) + + val rdd1 = sc.makeRDD(List( + ("a", 1), ("b", 2), ("c", 3) + )) + // val rdd2 = sc.makeRDD(List( + // ("a", 4),("b", 5),("c", 6) + // )) + val map = mutable.Map(("a", 4), ("b", 5), ("c", 6)) + + + + // join会导致数据量几何增长,并且会影响shuffle的性能,不推荐使用 + //val joinRDD: RDD[(String, (Int, Int))] = rdd1.join(rdd2) + //joinRDD.collect().foreach(println) + // (a, 1), (b, 2), (c, 3) + // (a, (1,4)),(b, (2,5)),(c, (3,6)) + rdd1.map { + case (w, c) => { + val l: Int = map.getOrElse(w, 0) + (w, (c, l)) + } + }.collect().foreach(println) + + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala new file mode 100644 index 0000000000..fa9dc22a93 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/acc/Spark06_Bc.scala @@ -0,0 +1,35 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.acc + +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.{SparkConf, SparkContext} + +import scala.collection.mutable + +object Spark06_Bc { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Acc") + val sc = new SparkContext(sparConf) + + val rdd1 = sc.makeRDD(List( + ("a", 1), ("b", 2), ("c", 3) + )) + val map = mutable.Map(("a", 4), ("b", 5), ("c", 6)) + + // 封装广播变量 + val bc: Broadcast[mutable.Map[String, Int]] = sc.broadcast(map) + + rdd1.map { + case (w, c) => { + // 方法广播变量 + val l: Int = bc.value.getOrElse(w, 0) + (w, (c, l)) + } + }.collect().foreach(println) + + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala similarity index 60% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala index 892f6d9bdc..52a88f2887 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/application/WordCountApplication.scala @@ -1,7 +1,8 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.framework.application -import com.atguigu.bigdata.spark.core.framework.common.TApplication -import com.atguigu.bigdata.spark.core.framework.controller.WordCountController +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.common.TApplication +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.controller.WordCountController + object WordCountApplication extends App with TApplication{ diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala similarity index 80% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala index 71c14ad600..ced07d5d47 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TApplication.scala @@ -1,7 +1,6 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.framework.common -import com.atguigu.bigdata.spark.core.framework.controller.WordCountController -import com.atguigu.bigdata.spark.core.framework.util.EnvUtil +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.util.EnvUtil import org.apache.spark.{SparkConf, SparkContext} trait TApplication { diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TController.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TController.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TController.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TController.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala similarity index 68% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala index 4b91befa84..a8edf0a68b 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TDao.scala @@ -1,6 +1,7 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.framework.common -import com.atguigu.bigdata.spark.core.framework.util.EnvUtil +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.util.EnvUtil + trait TDao { diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TService.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TService.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TService.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/common/TService.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala similarity index 69% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala index 5018104608..d79b72f70d 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/controller/WordCountController.scala @@ -1,7 +1,7 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.framework.controller -import com.atguigu.bigdata.spark.core.framework.common.TController -import com.atguigu.bigdata.spark.core.framework.service.WordCountService +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.common.TController +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.service.WordCountService /** diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala similarity index 61% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala index 1aa11dee81..02cc7f3224 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/dao/WordCountDao.scala @@ -1,6 +1,7 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.framework.dao -import com.atguigu.bigdata.spark.core.framework.common.TDao +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.common.TDao + // 持久层 class WordCountDao extends TDao{ diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala similarity index 79% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala index db60c9a20f..1d1f2e88d5 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/service/WordCountService.scala @@ -1,7 +1,7 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.framework.service -import com.atguigu.bigdata.spark.core.framework.common.TService -import com.atguigu.bigdata.spark.core.framework.dao.WordCountDao +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.common.TService +import com.taotao.cloud.bigdata.spark.atguigu.core.framework.dao.WordCountDao import org.apache.spark.rdd.RDD /** diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/util/EnvUtil.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/util/EnvUtil.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/framework/util/EnvUtil.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/framework/util/EnvUtil.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala new file mode 100644 index 0000000000..66a9bd007e --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory.scala @@ -0,0 +1,28 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Memory { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + // 从内存中创建RDD,将内存中集合的数据作为处理的数据源 + val seq = Seq[Int](1, 2, 3, 4) + + // parallelize : 并行 + //val rdd: RDD[Int] = sc.parallelize(seq) + // makeRDD方法在底层实现时其实就是调用了rdd对象的parallelize方法。 + val rdd: RDD[Int] = sc.makeRDD(seq) + + rdd.collect().foreach(println) + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala similarity index 97% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala index 1b7c2b9679..608c467bfb 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par.scala @@ -1,6 +1,5 @@ package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder -import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object Spark01_RDD_Memory_Par { diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala new file mode 100644 index 0000000000..819ba51c22 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark01_RDD_Memory_Par1.scala @@ -0,0 +1,28 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Memory_Par1 { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + + // 【1,2】,【3,4】 + //val rdd = sc.makeRDD(List(1,2,3,4), 2) + // 【1】,【2】,【3,4】 + //val rdd = sc.makeRDD(List(1,2,3,4), 3) + // 【1】,【2,3】,【4,5】 + val rdd = sc.makeRDD(List(1, 2, 3, 4, 5), 3) + + // 将处理的数据保存成分区文件 + rdd.saveAsTextFile("output") + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala new file mode 100644 index 0000000000..82781e5a7d --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File.scala @@ -0,0 +1,29 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_RDD_File { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + // 从文件中创建RDD,将文件中的数据作为处理的数据源 + // path路径默认以当前环境的根路径为基准。可以写绝对路径,也可以写相对路径 + //sc.textFile("D:\\mineworkspace\\idea\\classes\\atguigu-classes\\datas\\1.txt") + //val rdd: RDD[String] = sc.textFile("datas/1.txt") + // path路径可以是文件的具体路径,也可以目录名称 + //val rdd = sc.textFile("datas") + // path路径还可以使用通配符 * + //val rdd = sc.textFile("datas/1*.txt") + // path还可以是分布式存储系统路径:HDFS + val rdd = sc.textFile("hdfs://linux1:8020/test.txt") + rdd.collect().foreach(println) + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala new file mode 100644 index 0000000000..d8b71b464a --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File1.scala @@ -0,0 +1,26 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_RDD_File1 { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + // 从文件中创建RDD,将文件中的数据作为处理的数据源 + + // textFile : 以行为单位来读取数据,读取的数据都是字符串 + // wholeTextFiles : 以文件为单位读取数据 + // 读取的结果表示为元组,第一个元素表示文件路径,第二个元素表示文件内容 + val rdd = sc.wholeTextFiles("datas") + + rdd.collect().foreach(println) + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala new file mode 100644 index 0000000000..ec5893def3 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par.scala @@ -0,0 +1,35 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_RDD_File_Par { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + // textFile可以将文件作为数据处理的数据源,默认也可以设定分区。 + // minPartitions : 最小分区数量 + // math.min(defaultParallelism, 2) + //val rdd = sc.textFile("datas/1.txt") + // 如果不想使用默认的分区数量,可以通过第二个参数指定分区数 + // Spark读取文件,底层其实使用的就是Hadoop的读取方式 + // 分区数量的计算方式: + // totalSize = 7 + // goalSize = 7 / 2 = 3(byte) + + // 7 / 3 = 2...1 (1.1) + 1 = 3(分区) + + // + val rdd = sc.textFile("datas/1.txt", 2) + + rdd.saveAsTextFile("output") + + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala new file mode 100644 index 0000000000..8739352b65 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark02_RDD_File_Par1.scala @@ -0,0 +1,38 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_RDD_File_Par1 { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + // TODO 数据分区的分配 + // 1. 数据以行为单位进行读取 + // spark读取文件,采用的是hadoop的方式读取,所以一行一行读取,和字节数没有关系 + // 2. 数据读取时以偏移量为单位,偏移量不会被重复读取 + /* + 1@@ => 012 + 2@@ => 345 + 3 => 6 + + */ + // 3. 数据分区的偏移量范围的计算 + // 0 => [0, 3] => 12 + // 1 => [3, 6] => 3 + // 2 => [6, 7] => + + // 【1,2】,【3】,【】 + val rdd = sc.textFile("datas/1.txt", 2) + + rdd.saveAsTextFile("output") + + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala new file mode 100644 index 0000000000..0aa871ee59 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/builder/Spark03_RDD_File_Par2.scala @@ -0,0 +1,37 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.builder + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark03_RDD_File_Par2 { + + def main(args: Array[String]): Unit = { + + // TODO 准备环境 + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RDD") + val sc = new SparkContext(sparkConf) + + // TODO 创建RDD + + // 14byte / 2 = 7byte + // 14 / 7 = 2(分区) + + /* + 1234567@@ => 012345678 + 89@@ => 9101112 + 0 => 13 + + [0, 7] => 1234567 + [7, 14] => 890 + + */ + + // 如果数据源为多个文件,那么计算分区时以文件为单位进行分区 + val rdd = sc.textFile("datas/word.txt", 2) + + rdd.saveAsTextFile("output") + + + // TODO 关闭环境 + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala new file mode 100644 index 0000000000..6dee4b0413 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark01_RDD_Dep.scala @@ -0,0 +1,31 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.dep + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Dep { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") + val sc = new SparkContext(sparConf) + + val lines: RDD[String] = sc.textFile("datas/word.txt") + println(lines.toDebugString) + println("*************************") + val words: RDD[String] = lines.flatMap(_.split(" ")) + println(words.toDebugString) + println("*************************") + val wordToOne = words.map(word => (word, 1)) + println(wordToOne.toDebugString) + println("*************************") + val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _) + println(wordToSum.toDebugString) + println("*************************") + val array: Array[(String, Int)] = wordToSum.collect() + array.foreach(println) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala new file mode 100644 index 0000000000..43a64c022b --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/dep/Spark02_RDD_Dep.scala @@ -0,0 +1,31 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.dep + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_RDD_Dep { + + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf().setMaster("local").setAppName("Dep") + val sc = new SparkContext(sparConf) + + val lines: RDD[String] = sc.textFile("datas/word.txt") + println(lines.dependencies) + println("*************************") + val words: RDD[String] = lines.flatMap(_.split(" ")) + println(words.dependencies) + println("*************************") + val wordToOne = words.map(word => (word, 1)) + println(wordToOne.dependencies) + println("*************************") + val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _) + println(wordToSum.dependencies) + println("*************************") + val array: Array[(String, Int)] = wordToSum.collect() + array.foreach(println) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala new file mode 100644 index 0000000000..f21f8fd413 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Load.scala @@ -0,0 +1,22 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.io + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_IO_Load { + + def main(args: Array[String]): Unit = { + val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") + val sc = new SparkContext(sparConf) + + val rdd = sc.textFile("output1") + println(rdd.collect().mkString(",")) + + val rdd1 = sc.objectFile[(String, Int)]("output2") + println(rdd1.collect().mkString(",")) + + val rdd2 = sc.sequenceFile[String, Int]("output3") + println(rdd2.collect().mkString(",")) + + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala new file mode 100644 index 0000000000..2741824eb2 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/io/Spark01_RDD_IO_Save.scala @@ -0,0 +1,25 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.io + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_IO_Save { + + def main(args: Array[String]): Unit = { + val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") + val sc = new SparkContext(sparConf) + + val rdd = sc.makeRDD( + List( + ("a", 1), + ("b", 2), + ("c", 3) + ) + ) + + rdd.saveAsTextFile("output1") + rdd.saveAsObjectFile("output2") + rdd.saveAsSequenceFile("output3") + + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala new file mode 100644 index 0000000000..292e2ae80a --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark01_RDD_Operator_Action.scala @@ -0,0 +1,23 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + val rdd = sc.makeRDD(List(1, 2, 3, 4)) + + // TODO - 行动算子 + // 所谓的行动算子,其实就是触发作业(Job)执行的方法 + // 底层代码调用的是环境对象的runJob方法 + // 底层代码中会创建ActiveJob,并提交执行。 + rdd.collect() + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala new file mode 100644 index 0000000000..b15d1a3ec0 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark02_RDD_Operator_Action.scala @@ -0,0 +1,44 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + val rdd = sc.makeRDD(List(1, 2, 3, 4)) + + // TODO - 行动算子 + + // reduce + //val i: Int = rdd.reduce(_+_) + //println(i) + + // collect : 方法会将不同分区的数据按照分区顺序采集到Driver端内存中,形成数组 + //val ints: Array[Int] = rdd.collect() + //println(ints.mkString(",")) + + // count : 数据源中数据的个数 + val cnt = rdd.count() + println(cnt) + + // first : 获取数据源中数据的第一个 + val first = rdd.first() + println(first) + + // take : 获取N个数据 + val ints: Array[Int] = rdd.take(3) + println(ints.mkString(",")) + + // takeOrdered : 数据排序后,取N个数据 + val rdd1 = sc.makeRDD(List(4, 2, 3, 1)) + val ints1: Array[Int] = rdd1.takeOrdered(3) + println(ints1.mkString(",")) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala new file mode 100644 index 0000000000..d9357467c3 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark03_RDD_Operator_Action.scala @@ -0,0 +1,27 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark03_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + val rdd = sc.makeRDD(List(1, 2, 3, 4), 2) + + // TODO - 行动算子 + + //10 + 13 + 17 = 40 + // aggregateByKey : 初始值只会参与分区内计算 + // aggregate : 初始值会参与分区内计算,并且和参与分区间计算 + //val result = rdd.aggregate(10)(_+_, _+_) + val result = rdd.fold(10)(_ + _) + + println(result) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala new file mode 100644 index 0000000000..2b84fa2577 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark04_RDD_Operator_Action.scala @@ -0,0 +1,27 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark04_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + //val rdd = sc.makeRDD(List(1,1,1,4),2) + val rdd = sc.makeRDD(List( + ("a", 1), ("a", 2), ("a", 3) + )) + + // TODO - 行动算子 + + //val intToLong: collection.Map[Int, Long] = rdd.countByValue() + //println(intToLong) + val stringToLong: collection.Map[String, Long] = rdd.countByKey() + println(stringToLong) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala new file mode 100644 index 0000000000..596c754b9e --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark05_RDD_Operator_Action.scala @@ -0,0 +1,26 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark05_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + //val rdd = sc.makeRDD(List(1,1,1,4),2) + val rdd = sc.makeRDD(List( + ("a", 1), ("a", 2), ("a", 3) + )) + + // TODO - 行动算子 + rdd.saveAsTextFile("output") + rdd.saveAsObjectFile("output1") + // saveAsSequenceFile方法要求数据的格式必须为K-V类型 + rdd.saveAsSequenceFile("output2") + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala new file mode 100644 index 0000000000..3819ed36ce --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark06_RDD_Operator_Action.scala @@ -0,0 +1,30 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark06_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + val rdd = sc.makeRDD(List(1, 2, 3, 4)) + + // foreach 其实是Driver端内存集合的循环遍历方法 + rdd.collect().foreach(println) + println("******************") + // foreach 其实是Executor端内存数据打印 + rdd.foreach(println) + + // 算子 : Operator(操作) + // RDD的方法和Scala集合对象的方法不一样 + // 集合对象的方法都是在同一个节点的内存中完成的。 + // RDD的方法可以将计算逻辑发送到Executor端(分布式节点)执行 + // 为了区分不同的处理效果,所以将RDD的方法称之为算子。 + // RDD的方法外部的操作都是在Driver端执行的,而方法内部的逻辑代码是在Executor端执行。 + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala new file mode 100644 index 0000000000..0e37f493d4 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/action/Spark07_RDD_Operator_Action.scala @@ -0,0 +1,37 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.action + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark07_RDD_Operator_Action { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + val rdd = sc.makeRDD(List[Int]()) + + val user = new User() + + // SparkException: Task not serializable + // NotSerializableException: com.atguigu.bigdata.spark.core.rdd.operator.action.Spark07_RDD_Operator_Action$User + + // RDD算子中传递的函数是会包含闭包操作,那么就会进行检测功能 + // 闭包检测 + rdd.foreach( + num => { + println("age = " + (user.age + num)) + } + ) + + sc.stop() + + } + + //class User extends Serializable { + // 样例类在编译时,会自动混入序列化特质(实现可序列化接口) + //case class User() { + class User { + var age: Int = 30 + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala new file mode 100644 index 0000000000..45b5d4373f --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform.scala @@ -0,0 +1,36 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.transform + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Operator_Transform { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + // TODO 算子 - map + + val rdd = sc.makeRDD(List(1, 2, 3, 4)) + // 1,2,3,4 + // 2,4,6,8 + + // 转换函数 + def mapFunction(num: Int): Int = { + num * 2 + } + + //val mapRDD: RDD[Int] = rdd.map(mapFunction) + //val mapRDD: RDD[Int] = rdd.map((num:Int)=>{num*2}) + //val mapRDD: RDD[Int] = rdd.map((num:Int)=>num*2) + //val mapRDD: RDD[Int] = rdd.map((num)=>num*2) + //val mapRDD: RDD[Int] = rdd.map(num=>num*2) + val mapRDD: RDD[Int] = rdd.map(_ * 2) + + mapRDD.collect().foreach(println) + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala new file mode 100644 index 0000000000..25ae3149b7 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Par.scala @@ -0,0 +1,38 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.transform + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Operator_Transform_Par { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + // TODO 算子 - map + + // 1. rdd的计算一个分区内的数据是一个一个执行逻辑 + // 只有前面一个数据全部的逻辑执行完毕后,才会执行下一个数据。 + // 分区内数据的执行是有序的。 + // 2. 不同分区数据计算是无序的。 + val rdd = sc.makeRDD(List(1, 2, 3, 4), 2) + + val mapRDD = rdd.map( + num => { + println(">>>>>>>> " + num) + num + } + ) + val mapRDD1 = mapRDD.map( + num => { + println("######" + num) + num + } + ) + + mapRDD1.collect() + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala new file mode 100644 index 0000000000..f2de94f633 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Part.scala @@ -0,0 +1,23 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.rdd.operator.transform + +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_RDD_Operator_Transform_Part { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator") + val sc = new SparkContext(sparkConf) + + // TODO 算子 - map + val rdd = sc.makeRDD(List(1, 2, 3, 4), 2) + // 【1,2】,【3,4】 + rdd.saveAsTextFile("output") + val mapRDD = rdd.map(_ * 2) + // 【2,4】,【6,8】 + mapRDD.saveAsTextFile("output1") + + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Test.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Test.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark01_RDD_Operator_Transform_Test.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform_Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform_Test.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform_Test.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark02_RDD_Operator_Transform_Test.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark03_RDD_Operator_Transform1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform2.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform2.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark04_RDD_Operator_Transform2.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform_Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform_Test.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform_Test.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark05_RDD_Operator_Transform_Test.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform_Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform_Test.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform_Test.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark06_RDD_Operator_Transform_Test.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform_Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform_Test.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform_Test.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark07_RDD_Operator_Transform_Test.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark08_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark08_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark08_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark08_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark09_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark09_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark09_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark09_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark10_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark10_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark10_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark10_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark11_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark11_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark11_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark11_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark12_RDD_Operator_Transform1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark13_RDD_Operator_Transform1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark14_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark14_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark14_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark14_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark15_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark15_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark15_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark15_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark16_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark16_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark16_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark16_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform2.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform2.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark17_RDD_Operator_Transform2.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark18_RDD_Operator_Transform3.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark18_RDD_Operator_Transform3.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark18_RDD_Operator_Transform3.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark18_RDD_Operator_Transform3.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark19_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark19_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark19_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark19_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark20_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark20_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark20_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark20_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark21_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark21_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark21_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark21_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark22_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark22_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark22_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark22_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark23_RDD_Operator_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark23_RDD_Operator_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark23_RDD_Operator_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark23_RDD_Operator_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark24_RDD_Req.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark24_RDD_Req.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark24_RDD_Req.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/operator/transform/Spark24_RDD_Req.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/part/Spark01_RDD_Part.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/part/Spark01_RDD_Part.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/part/Spark01_RDD_Part.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/part/Spark01_RDD_Part.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark01_RDD_Persist.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark01_RDD_Persist.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark01_RDD_Persist.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark01_RDD_Persist.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark02_RDD_Persist.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark02_RDD_Persist.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark02_RDD_Persist.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark02_RDD_Persist.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark03_RDD_Persist.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark03_RDD_Persist.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark03_RDD_Persist.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark03_RDD_Persist.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark04_RDD_Persist.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark04_RDD_Persist.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark04_RDD_Persist.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark04_RDD_Persist.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark05_RDD_Persist.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark05_RDD_Persist.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark05_RDD_Persist.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark05_RDD_Persist.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark06_RDD_Persist.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark06_RDD_Persist.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark06_RDD_Persist.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/persist/Spark06_RDD_Persist.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/serial/Spark01_RDD_Serial.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/serial/Spark01_RDD_Serial.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/serial/Spark01_RDD_Serial.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/rdd/serial/Spark01_RDD_Serial.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala new file mode 100644 index 0000000000..01b440c1e3 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark01_Req1_HotCategoryTop10Analysis.scala @@ -0,0 +1,108 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.req + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_Req1_HotCategoryTop10Analysis { + + def main(args: Array[String]): Unit = { + + // TODO : Top10热门品类 + val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") + val sc = new SparkContext(sparConf) + + // 1. 读取原始日志数据 + val actionRDD = sc.textFile("datas/user_visit_action.txt") + + // 2. 统计品类的点击数量:(品类ID,点击数量) + val clickActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + datas(6) != "-1" + } + ) + + val clickCountRDD: RDD[(String, Int)] = clickActionRDD.map( + action => { + val datas = action.split("_") + (datas(6), 1) + } + ).reduceByKey(_ + _) + + // 3. 统计品类的下单数量:(品类ID,下单数量) + val orderActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + datas(8) != "null" + } + ) + + // orderid => 1,2,3 + // 【(1,1),(2,1),(3,1)】 + val orderCountRDD = orderActionRDD.flatMap( + action => { + val datas = action.split("_") + val cid = datas(8) + val cids = cid.split(",") + cids.map(id => (id, 1)) + } + ).reduceByKey(_ + _) + + // 4. 统计品类的支付数量:(品类ID,支付数量) + val payActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + datas(10) != "null" + } + ) + + // orderid => 1,2,3 + // 【(1,1),(2,1),(3,1)】 + val payCountRDD = payActionRDD.flatMap( + action => { + val datas = action.split("_") + val cid = datas(10) + val cids = cid.split(",") + cids.map(id => (id, 1)) + } + ).reduceByKey(_ + _) + + // 5. 将品类进行排序,并且取前10名 + // 点击数量排序,下单数量排序,支付数量排序 + // 元组排序:先比较第一个,再比较第二个,再比较第三个,依此类推 + // ( 品类ID, ( 点击数量, 下单数量, 支付数量 ) ) + // + // cogroup = connect + group + val cogroupRDD: RDD[(String, (Iterable[Int], Iterable[Int], Iterable[Int]))] = + clickCountRDD.cogroup(orderCountRDD, payCountRDD) + val analysisRDD = cogroupRDD.mapValues { + case (clickIter, orderIter, payIter) => { + + var clickCnt = 0 + val iter1 = clickIter.iterator + if (iter1.hasNext) { + clickCnt = iter1.next() + } + var orderCnt = 0 + val iter2 = orderIter.iterator + if (iter2.hasNext) { + orderCnt = iter2.next() + } + var payCnt = 0 + val iter3 = payIter.iterator + if (iter3.hasNext) { + payCnt = iter3.next() + } + + (clickCnt, orderCnt, payCnt) + } + } + + val resultRDD = analysisRDD.sortBy(_._2, false).take(10) + + // 6. 将结果采集到控制台打印出来 + resultRDD.foreach(println) + + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala new file mode 100644 index 0000000000..47ff9ee713 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark02_Req1_HotCategoryTop10Analysis1.scala @@ -0,0 +1,118 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.req + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_Req1_HotCategoryTop10Analysis1 { + + def main(args: Array[String]): Unit = { + + // TODO : Top10热门品类 + val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") + val sc = new SparkContext(sparConf) + + // Q : actionRDD重复使用 + // Q : cogroup性能可能较低 + + // 1. 读取原始日志数据 + val actionRDD = sc.textFile("datas/user_visit_action.txt") + actionRDD.cache() + + // 2. 统计品类的点击数量:(品类ID,点击数量) + val clickActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + datas(6) != "-1" + } + ) + + val clickCountRDD: RDD[(String, Int)] = clickActionRDD.map( + action => { + val datas = action.split("_") + (datas(6), 1) + } + ).reduceByKey(_ + _) + + // 3. 统计品类的下单数量:(品类ID,下单数量) + val orderActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + datas(8) != "null" + } + ) + + // orderid => 1,2,3 + // 【(1,1),(2,1),(3,1)】 + val orderCountRDD = orderActionRDD.flatMap( + action => { + val datas = action.split("_") + val cid = datas(8) + val cids = cid.split(",") + cids.map(id => (id, 1)) + } + ).reduceByKey(_ + _) + + // 4. 统计品类的支付数量:(品类ID,支付数量) + val payActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + datas(10) != "null" + } + ) + + // orderid => 1,2,3 + // 【(1,1),(2,1),(3,1)】 + val payCountRDD = payActionRDD.flatMap( + action => { + val datas = action.split("_") + val cid = datas(10) + val cids = cid.split(",") + cids.map(id => (id, 1)) + } + ).reduceByKey(_ + _) + + // (品类ID, 点击数量) => (品类ID, (点击数量, 0, 0)) + // (品类ID, 下单数量) => (品类ID, (0, 下单数量, 0)) + // => (品类ID, (点击数量, 下单数量, 0)) + // (品类ID, 支付数量) => (品类ID, (0, 0, 支付数量)) + // => (品类ID, (点击数量, 下单数量, 支付数量)) + // ( 品类ID, ( 点击数量, 下单数量, 支付数量 ) ) + + // 5. 将品类进行排序,并且取前10名 + // 点击数量排序,下单数量排序,支付数量排序 + // 元组排序:先比较第一个,再比较第二个,再比较第三个,依此类推 + // ( 品类ID, ( 点击数量, 下单数量, 支付数量 ) ) + // + val rdd1 = clickCountRDD.map { + case (cid, cnt) => { + (cid, (cnt, 0, 0)) + } + } + val rdd2 = orderCountRDD.map { + case (cid, cnt) => { + (cid, (0, cnt, 0)) + } + } + val rdd3 = payCountRDD.map { + case (cid, cnt) => { + (cid, (0, 0, cnt)) + } + } + + // 将三个数据源合并在一起,统一进行聚合计算 + val soruceRDD: RDD[(String, (Int, Int, Int))] = rdd1.union(rdd2).union(rdd3) + + val analysisRDD = soruceRDD.reduceByKey( + (t1, t2) => { + (t1._1 + t2._1, t1._2 + t2._2, t1._3 + t2._3) + } + ) + + val resultRDD = analysisRDD.sortBy(_._2, false).take(10) + + // 6. 将结果采集到控制台打印出来 + resultRDD.foreach(println) + + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala new file mode 100644 index 0000000000..9508122b55 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark03_Req1_HotCategoryTop10Analysis2.scala @@ -0,0 +1,60 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.req + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark03_Req1_HotCategoryTop10Analysis2 { + + def main(args: Array[String]): Unit = { + + // TODO : Top10热门品类 + val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") + val sc = new SparkContext(sparConf) + + // Q : 存在大量的shuffle操作(reduceByKey) + // reduceByKey 聚合算子,spark会提供优化,缓存 + + // 1. 读取原始日志数据 + val actionRDD = sc.textFile("datas/user_visit_action.txt") + + // 2. 将数据转换结构 + // 点击的场合 : ( 品类ID,( 1, 0, 0 ) ) + // 下单的场合 : ( 品类ID,( 0, 1, 0 ) ) + // 支付的场合 : ( 品类ID,( 0, 0, 1 ) ) + val flatRDD: RDD[(String, (Int, Int, Int))] = actionRDD.flatMap( + action => { + val datas = action.split("_") + if (datas(6) != "-1") { + // 点击的场合 + List((datas(6), (1, 0, 0))) + } else if (datas(8) != "null") { + // 下单的场合 + val ids = datas(8).split(",") + ids.map(id => (id, (0, 1, 0))) + } else if (datas(10) != "null") { + // 支付的场合 + val ids = datas(10).split(",") + ids.map(id => (id, (0, 0, 1))) + } else { + Nil + } + } + ) + + // 3. 将相同的品类ID的数据进行分组聚合 + // ( 品类ID,( 点击数量, 下单数量, 支付数量 ) ) + val analysisRDD = flatRDD.reduceByKey( + (t1, t2) => { + (t1._1 + t2._1, t1._2 + t2._2, t1._3 + t2._3) + } + ) + + // 4. 将统计结果根据数量进行降序处理,取前10名 + val resultRDD = analysisRDD.sortBy(_._2, false).take(10) + + // 5. 将结果采集到控制台打印出来 + resultRDD.foreach(println) + + sc.stop() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala new file mode 100644 index 0000000000..53d57626ef --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark04_Req1_HotCategoryTop10Analysis3.scala @@ -0,0 +1,132 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.req + +import org.apache.spark.util.AccumulatorV2 +import org.apache.spark.{SparkConf, SparkContext} + +import scala.collection.mutable + +object Spark04_Req1_HotCategoryTop10Analysis3 { + + def main(args: Array[String]): Unit = { + + // TODO : Top10热门品类 + val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") + val sc = new SparkContext(sparConf) + + // 1. 读取原始日志数据 + val actionRDD = sc.textFile("datas/user_visit_action.txt") + + val acc = new HotCategoryAccumulator + sc.register(acc, "hotCategory") + + // 2. 将数据转换结构 + actionRDD.foreach( + action => { + val datas = action.split("_") + if (datas(6) != "-1") { + // 点击的场合 + acc.add((datas(6), "click")) + } else if (datas(8) != "null") { + // 下单的场合 + val ids = datas(8).split(",") + ids.foreach( + id => { + acc.add((id, "order")) + } + ) + } else if (datas(10) != "null") { + // 支付的场合 + val ids = datas(10).split(",") + ids.foreach( + id => { + acc.add((id, "pay")) + } + ) + } + } + ) + + val accVal: mutable.Map[String, HotCategory] = acc.value + val categories: mutable.Iterable[HotCategory] = accVal.map(_._2) + + val sort = categories.toList.sortWith( + (left, right) => { + if (left.clickCnt > right.clickCnt) { + true + } else if (left.clickCnt == right.clickCnt) { + if (left.orderCnt > right.orderCnt) { + true + } else if (left.orderCnt == right.orderCnt) { + left.payCnt > right.payCnt + } else { + false + } + } else { + false + } + } + ) + + // 5. 将结果采集到控制台打印出来 + sort.take(10).foreach(println) + + sc.stop() + } + + case class HotCategory(cid: String, var clickCnt: Int, var orderCnt: Int, var payCnt: Int) + + /** + * 自定义累加器 + * 1. 继承AccumulatorV2,定义泛型 + * IN : ( 品类ID, 行为类型 ) + * OUT : mutable.Map[String, HotCategory] + * 2. 重写方法(6) + */ + class HotCategoryAccumulator extends AccumulatorV2[(String, String), mutable.Map[String, HotCategory]] { + + private val hcMap = mutable.Map[String, HotCategory]() + + override def isZero: Boolean = { + hcMap.isEmpty + } + + override def copy(): AccumulatorV2[(String, String), mutable.Map[String, HotCategory]] = { + new HotCategoryAccumulator() + } + + override def reset(): Unit = { + hcMap.clear() + } + + override def add(v: (String, String)): Unit = { + val cid = v._1 + val actionType = v._2 + val category: HotCategory = hcMap.getOrElse(cid, HotCategory(cid, 0, 0, 0)) + if (actionType == "click") { + category.clickCnt += 1 + } else if (actionType == "order") { + category.orderCnt += 1 + } else if (actionType == "pay") { + category.payCnt += 1 + } + hcMap.update(cid, category) + } + + override def merge(other: AccumulatorV2[(String, String), mutable.Map[String, HotCategory]]): Unit = { + val map1 = this.hcMap + val map2 = other.value + + map2.foreach { + case (cid, hc) => { + val category: HotCategory = map1.getOrElse(cid, HotCategory(cid, 0, 0, 0)) + category.clickCnt += hc.clickCnt + category.orderCnt += hc.orderCnt + category.payCnt += hc.payCnt + map1.update(cid, category) + } + } + } + + override def value: mutable.Map[String, HotCategory] = hcMap + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala new file mode 100644 index 0000000000..408a98f40e --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark05_Req2_HotCategoryTop10SessionAnalysis.scala @@ -0,0 +1,91 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.req + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark05_Req2_HotCategoryTop10SessionAnalysis { + + def main(args: Array[String]): Unit = { + + // TODO : Top10热门品类 + val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") + val sc = new SparkContext(sparConf) + + val actionRDD = sc.textFile("datas/user_visit_action.txt") + actionRDD.cache() + val top10Ids: Array[String] = top10Category(actionRDD) + + // 1. 过滤原始数据,保留点击和前10品类ID + val filterActionRDD = actionRDD.filter( + action => { + val datas = action.split("_") + if (datas(6) != "-1") { + top10Ids.contains(datas(6)) + } else { + false + } + } + ) + + // 2. 根据品类ID和sessionid进行点击量的统计 + val reduceRDD: RDD[((String, String), Int)] = filterActionRDD.map( + action => { + val datas = action.split("_") + ((datas(6), datas(2)), 1) + } + ).reduceByKey(_ + _) + + // 3. 将统计的结果进行结构的转换 + // (( 品类ID,sessionId ),sum) => ( 品类ID,(sessionId, sum) ) + val mapRDD = reduceRDD.map { + case ((cid, sid), sum) => { + (cid, (sid, sum)) + } + } + + // 4. 相同的品类进行分组 + val groupRDD: RDD[(String, Iterable[(String, Int)])] = mapRDD.groupByKey() + + // 5. 将分组后的数据进行点击量的排序,取前10名 + val resultRDD = groupRDD.mapValues( + iter => { + iter.toList.sortBy(_._2)(Ordering.Int.reverse).take(10) + } + ) + + resultRDD.collect().foreach(println) + + + sc.stop() + } + + def top10Category(actionRDD: RDD[String]) = { + val flatRDD: RDD[(String, (Int, Int, Int))] = actionRDD.flatMap( + action => { + val datas = action.split("_") + if (datas(6) != "-1") { + // 点击的场合 + List((datas(6), (1, 0, 0))) + } else if (datas(8) != "null") { + // 下单的场合 + val ids = datas(8).split(",") + ids.map(id => (id, (0, 1, 0))) + } else if (datas(10) != "null") { + // 支付的场合 + val ids = datas(10).split(",") + ids.map(id => (id, (0, 0, 1))) + } else { + Nil + } + } + ) + + val analysisRDD = flatRDD.reduceByKey( + (t1, t2) => { + (t1._1 + t2._1, t1._2 + t2._2, t1._3 + t2._3) + } + ) + + analysisRDD.sortBy(_._2, false).take(10).map(_._1) + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala new file mode 100644 index 0000000000..32d29d6b00 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/req/Spark06_Req3_PageflowAnalysis.scala @@ -0,0 +1,121 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.req + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark06_Req3_PageflowAnalysis { + + def main(args: Array[String]): Unit = { + + // TODO : Top10热门品类 + val sparConf = new SparkConf().setMaster("local[*]").setAppName("HotCategoryTop10Analysis") + val sc = new SparkContext(sparConf) + + val actionRDD = sc.textFile("datas/user_visit_action.txt") + + val actionDataRDD = actionRDD.map( + action => { + val datas = action.split("_") + UserVisitAction( + datas(0), + datas(1).toLong, + datas(2), + datas(3).toLong, + datas(4), + datas(5), + datas(6).toLong, + datas(7).toLong, + datas(8), + datas(9), + datas(10), + datas(11), + datas(12).toLong + ) + } + ) + actionDataRDD.cache() + + // TODO 对指定的页面连续跳转进行统计 + // 1-2,2-3,3-4,4-5,5-6,6-7 + val ids = List[Long](1, 2, 3, 4, 5, 6, 7) + val okflowIds: List[(Long, Long)] = ids.zip(ids.tail) + + // TODO 计算分母 + val pageidToCountMap: Map[Long, Long] = actionDataRDD.filter( + action => { + ids.init.contains(action.page_id) + } + ).map( + action => { + (action.page_id, 1L) + } + ).reduceByKey(_ + _).collect().toMap + + // TODO 计算分子 + + // 根据session进行分组 + val sessionRDD: RDD[(String, Iterable[UserVisitAction])] = actionDataRDD.groupBy(_.session_id) + + // 分组后,根据访问时间进行排序(升序) + val mvRDD: RDD[(String, List[((Long, Long), Int)])] = sessionRDD.mapValues( + iter => { + val sortList: List[UserVisitAction] = iter.toList.sortBy(_.action_time) + + // 【1,2,3,4】 + // 【1,2】,【2,3】,【3,4】 + // 【1-2,2-3,3-4】 + // Sliding : 滑窗 + // 【1,2,3,4】 + // 【2,3,4】 + // zip : 拉链 + val flowIds: List[Long] = sortList.map(_.page_id) + val pageflowIds: List[(Long, Long)] = flowIds.zip(flowIds.tail) + + // 将不合法的页面跳转进行过滤 + pageflowIds.filter( + t => { + okflowIds.contains(t) + } + ).map( + t => { + (t, 1) + } + ) + } + ) + // ((1,2),1) + val flatRDD: RDD[((Long, Long), Int)] = mvRDD.map(_._2).flatMap(list => list) + // ((1,2),1) => ((1,2),sum) + val dataRDD = flatRDD.reduceByKey(_ + _) + + // TODO 计算单跳转换率 + // 分子除以分母 + dataRDD.foreach { + case ((pageid1, pageid2), sum) => { + val lon: Long = pageidToCountMap.getOrElse(pageid1, 0L) + + println(s"页面${pageid1}跳转到页面${pageid2}单跳转换率为:" + (sum.toDouble / lon)) + } + } + + + sc.stop() + } + + //用户访问动作表 + case class UserVisitAction( + date: String, //用户点击行为的日期 + user_id: Long, //用户的ID + session_id: String, //Session的ID + page_id: Long, //某个页面的ID + action_time: String, //动作的时间点 + search_keyword: String, //用户搜索的关键词 + click_category_id: Long, //某一个商品品类的ID + click_product_id: Long, //某一个商品的ID + order_category_ids: String, //一次订单中所有品类的ID集合 + order_product_ids: String, //一次订单中所有商品的ID集合 + pay_category_ids: String, //一次支付中所有品类的ID集合 + pay_product_ids: String, //一次支付中所有商品的ID集合 + city_id: Long + ) //城市 id +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala new file mode 100644 index 0000000000..72f0eaf44c --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Driver.scala @@ -0,0 +1,40 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.test + +import java.io.{ObjectOutputStream, OutputStream} +import java.net.Socket + +object Driver { + + def main(args: Array[String]): Unit = { + // 连接服务器 + val client1 = new Socket("localhost", 9999) + val client2 = new Socket("localhost", 8888) + + val task = new Task() + + val out1: OutputStream = client1.getOutputStream + val objOut1 = new ObjectOutputStream(out1) + + val subTask = new SubTask() + subTask.logic = task.logic + subTask.datas = task.datas.take(2) + + objOut1.writeObject(subTask) + objOut1.flush() + objOut1.close() + client1.close() + + + val out2: OutputStream = client2.getOutputStream + val objOut2 = new ObjectOutputStream(out2) + + val subTask1 = new SubTask() + subTask1.logic = task.logic + subTask1.datas = task.datas.takeRight(2) + objOut2.writeObject(subTask1) + objOut2.flush() + objOut2.close() + client2.close() + println("客户端数据发送完毕") + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala similarity index 99% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala index caa20fc367..9140b57bbf 100644 --- a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor.scala @@ -6,7 +6,6 @@ import java.net.{ServerSocket, Socket} object Executor { def main(args: Array[String]): Unit = { - // 启动服务器,接收数据 val server = new ServerSocket(9999) println("服务器启动,等待接收数据") diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala new file mode 100644 index 0000000000..1164201492 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Executor2.scala @@ -0,0 +1,25 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.test + +import java.io.{InputStream, ObjectInputStream} +import java.net.{ServerSocket, Socket} + +object Executor2 { + + def main(args: Array[String]): Unit = { + + // 启动服务器,接收数据 + val server = new ServerSocket(8888) + println("服务器启动,等待接收数据") + + // 等待客户端的连接 + val client: Socket = server.accept() + val in: InputStream = client.getInputStream + val objIn = new ObjectInputStream(in) + val task: SubTask = objIn.readObject().asInstanceOf[SubTask] + val ints: List[Int] = task.compute() + println("计算节点[8888]计算的结果为:" + ints) + objIn.close() + client.close() + server.close() + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala new file mode 100644 index 0000000000..86be7aabc9 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/SubTask.scala @@ -0,0 +1,11 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.test + +class SubTask extends Serializable { + var datas: List[Int] = _ + var logic: (Int) => Int = _ + + // 计算 + def compute() = { + datas.map(logic) + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala new file mode 100644 index 0000000000..b34cc72bcc --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/test/Task.scala @@ -0,0 +1,11 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.test + +class Task extends Serializable { + + val datas = List(1, 2, 3, 4) + + //val logic = ( num:Int )=>{ num * 2 } + val logic: (Int) => Int = _ * 2 + + +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala new file mode 100644 index 0000000000..82557ad4ef --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark01_WordCount.scala @@ -0,0 +1,49 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.wc + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark01_WordCount { + + def main(args: Array[String]): Unit = { + + // Application + // Spark框架 + // 建立和Spark框架的连接 + // JDBC : Connection + val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") + val sc = new SparkContext(sparConf) + + // TODO 执行业务操作 + + // 1. 读取文件,获取一行一行的数据 + // hello world + val lines: RDD[String] = sc.textFile("datas") + + // 2. 将一行数据进行拆分,形成一个一个的单词(分词) + // 扁平化:将整体拆分成个体的操作 + // "hello world" => hello, world, hello, world + val words: RDD[String] = lines.flatMap(_.split(" ")) + + // 3. 将数据根据单词进行分组,便于统计 + // (hello, hello, hello), (world, world) + val wordGroup: RDD[(String, Iterable[String])] = words.groupBy(word => word) + + // 4. 对分组后的数据进行转换 + // (hello, hello, hello), (world, world) + // (hello, 3), (world, 2) + val wordToCount: RDD[(String, Int)] = wordGroup.map { + case (word, list) => { + (word, list.size) + } + } + + // 5. 将转换结果采集到控制台打印出来 + val array: Array[(String, Int)] = wordToCount.collect() + array.foreach(println) + + // TODO 关闭连接 + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala new file mode 100644 index 0000000000..96e2a1e4a9 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark02_WordCount1.scala @@ -0,0 +1,45 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.wc + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +object Spark02_WordCount1 { + + def main(args: Array[String]): Unit = { + + // Application + // Spark框架 + // TODO 建立和Spark框架的连接 + // JDBC : Connection + val sparConf = new SparkConf().setMaster("local").setAppName("WordCount") + val sc = new SparkContext(sparConf) + + // TODO 执行业务操作 + + // 1. 读取文件,获取一行一行的数据 + // hello world + val lines: RDD[String] = sc.textFile("datas") + + // 2. 将一行数据进行拆分,形成一个一个的单词(分词) + // 扁平化:将整体拆分成个体的操作 + // "hello world" => hello, world, hello, world + val words: RDD[String] = lines.flatMap(_.split(" ")) + + // 3. 将单词进行结构的转换,方便统计 + // word => (word, 1) + val wordToOne = words.map(word => (word, 1)) + + // 4. 将转换后的数据进行分组聚合 + // 相同key的value进行聚合操作 + // (word, 1) => (word, sum) + val wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _) + + // 5. 将转换结果采集到控制台打印出来 + val array: Array[(String, Int)] = wordToSum.collect() + array.foreach(println) + + // TODO 关闭连接 + sc.stop() + + } +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala new file mode 100644 index 0000000000..44d5e24658 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/core/wc/Spark03_WordCount.scala @@ -0,0 +1,119 @@ +package com.taotao.cloud.bigdata.spark.atguigu.core.wc + +import org.apache.spark.rdd.RDD +import org.apache.spark.{SparkConf, SparkContext} + +import scala.collection.mutable + +object Spark03_WordCount { + def main(args: Array[String]): Unit = { + + val sparConf = new SparkConf() + .setMaster("local") + .setAppName("WordCount") + val sc = new SparkContext(sparConf) + + wordcount91011(sc) + + sc.stop() + + } + + // groupBy + def wordcount1(sc: SparkContext): Unit = { + + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val group: RDD[(String, Iterable[String])] = words.groupBy(word => word) + val wordCount: RDD[(String, Int)] = group.mapValues(iter => iter.size) + } + + // groupByKey + def wordcount2(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordOne = words.map((_, 1)) + val group: RDD[(String, Iterable[Int])] = wordOne.groupByKey() + val wordCount: RDD[(String, Int)] = group.mapValues(iter => iter.size) + } + + // reduceByKey + def wordcount3(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordOne = words.map((_, 1)) + val wordCount: RDD[(String, Int)] = wordOne.reduceByKey(_ + _) + } + + // aggregateByKey + def wordcount4(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordOne = words.map((_, 1)) + val wordCount: RDD[(String, Int)] = wordOne.aggregateByKey(0)(_ + _, _ + _) + } + + // foldByKey + def wordcount5(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordOne = words.map((_, 1)) + val wordCount: RDD[(String, Int)] = wordOne.foldByKey(0)(_ + _) + } + + // combineByKey + def wordcount6(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordOne = words.map((_, 1)) + val wordCount: RDD[(String, Int)] = wordOne.combineByKey( + v => v, + (x: Int, y) => x + y, + (x: Int, y: Int) => x + y + ) + } + + // countByKey + def wordcount7(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordOne = words.map((_, 1)) + val wordCount: collection.Map[String, Long] = wordOne.countByKey() + } + + // countByValue + def wordcount8(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + val wordCount: collection.Map[String, Long] = words.countByValue() + } + + // reduce, aggregate, fold + def wordcount91011(sc: SparkContext): Unit = { + val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark")) + val words = rdd.flatMap(_.split(" ")) + + // 【(word, count),(word, count)】 + // word => Map[(word,1)] + val mapWord = words.map( + word => { + mutable.Map[String, Long]((word, 1)) + } + ) + + val wordCount = mapWord.reduce( + (map1, map2) => { + map2.foreach { + case (word, count) => { + val newCount = map1.getOrElse(word, 0L) + count + map1.update(word, newCount) + } + } + map1 + } + ) + + println(wordCount) + } + +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark01_SparkSQL_Basic.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark01_SparkSQL_Basic.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark01_SparkSQL_Basic.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark01_SparkSQL_Basic.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark02_SparkSQL_UDF.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark02_SparkSQL_UDF.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark02_SparkSQL_UDF.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark02_SparkSQL_UDF.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF2.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF2.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark03_SparkSQL_UDAF2.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark04_SparkSQL_JDBC.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark04_SparkSQL_JDBC.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark04_SparkSQL_JDBC.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark04_SparkSQL_JDBC.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark05_SparkSQL_Hive.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark05_SparkSQL_Hive.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark05_SparkSQL_Hive.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark05_SparkSQL_Hive.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test2.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test2.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/sql/Spark06_SparkSQL_Test2.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming01_WordCount.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming01_WordCount.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming01_WordCount.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming01_WordCount.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming02_Queue.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming02_Queue.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming02_Queue.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming02_Queue.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming03_DIY.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming03_DIY.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming03_DIY.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming03_DIY.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming04_Kafka.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming04_Kafka.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming04_Kafka.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming04_Kafka.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming05_State.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming05_State.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming05_State.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming05_State.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Join.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Join.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Join.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Join.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Transform.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Transform.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Transform.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Transform.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming06_State_Window1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming07_Output1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming08_Close.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming08_Close.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming08_Close.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming08_Close.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming09_Resume.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming09_Resume.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming09_Resume.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming09_Resume.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming10_MockData.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming10_MockData.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming10_MockData.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming10_MockData.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList1.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList1.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList1.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming11_Req1_BlackList1.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming12_Req2.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming12_Req2.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming12_Req2.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming12_Req2.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req3.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req3.scala similarity index 100% rename from taotao-cloud-bigdata/taotao-cloud-spark/src/main/java/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req3.scala rename to taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req3.scala diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala new file mode 100644 index 0000000000..1cc582f3cb --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/streaming/SparkStreaming13_Req31.scala @@ -0,0 +1,93 @@ +package com.taotao.cloud.bigdata.spark.atguigu.streaming + +import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord} +import org.apache.spark.SparkConf +import org.apache.spark.streaming.dstream.InputDStream +import org.apache.spark.streaming.{Seconds, StreamingContext} + +import java.io.{File, FileWriter, PrintWriter} +import java.text.SimpleDateFormat +import scala.collection.mutable.ListBuffer + +object SparkStreaming13_Req31 { + + def main(args: Array[String]): Unit = { + + val sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkStreaming") + val ssc = new StreamingContext(sparkConf, Seconds(5)) + + val kafkaPara: Map[String, Object] = Map[String, Object]( + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "linux1:9092,linux2:9092,linux3:9092", + ConsumerConfig.GROUP_ID_CONFIG -> "atguigu", + "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", + "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer" + ) + + val kafkaDataDS: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String]( + ssc, + LocationStrategies.PreferConsistent, + ConsumerStrategies.Subscribe[String, String](Set("atguiguNew"), kafkaPara) + ) + val adClickData = kafkaDataDS.map( + kafkaData => { + val data = kafkaData.value() + val datas = data.split(" ") + AdClickData(datas(0), datas(1), datas(2), datas(3), datas(4)) + } + ) + + // 最近一分钟,每10秒计算一次 + // 12:01 => 12:00 + // 12:11 => 12:10 + // 12:19 => 12:10 + // 12:25 => 12:20 + // 12:59 => 12:50 + + // 55 => 50, 49 => 40, 32 => 30 + // 55 / 10 * 10 => 50 + // 49 / 10 * 10 => 40 + // 32 / 10 * 10 => 30 + + // 这里涉及窗口的计算 + val reduceDS = adClickData.map( + data => { + val ts = data.ts.toLong + val newTS = ts / 10000 * 10000 + (newTS, 1) + } + ).reduceByKeyAndWindow((x: Int, y: Int) => { + x + y + }, Seconds(60), Seconds(10)) + + //reduceDS.print() + reduceDS.foreachRDD( + rdd => { + val list = ListBuffer[String]() + + val datas: Array[(Long, Int)] = rdd.sortByKey(true).collect() + datas.foreach { + case (time, cnt) => { + + val timeString = new SimpleDateFormat("mm:ss").format(new java.util.Date(time.toLong)) + + list.append(s"""{"xtime":"${timeString}", "yval":"${cnt}"}""") + } + } + + // 输出文件 + val out = new PrintWriter(new FileWriter(new File("D:\\mineworkspace\\idea\\classes\\atguigu-classes\\datas\\adclick\\adclick.json"))) + out.println("[" + list.mkString(",") + "]") + out.flush() + out.close() + } + ) + + + ssc.start() + ssc.awaitTermination() + } + + // 广告点击数据 + case class AdClickData(ts: String, area: String, city: String, user: String, ad: String) + +} diff --git a/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala new file mode 100644 index 0000000000..9c8d9dd738 --- /dev/null +++ b/taotao-cloud-bigdata/taotao-cloud-spark/src/main/scala/com/taotao/cloud/bigdata/spark/atguigu/util/JDBCUtil.scala @@ -0,0 +1,65 @@ +package com.taotao.cloud.bigdata.spark.atguigu.util + +import java.sql.{Connection, PreparedStatement} +import java.util.Properties +import javax.sql.DataSource + +object JDBCUtil { + //初始化连接池 + var dataSource: DataSource = init() + + //初始化连接池方法 + def init(): DataSource = { + val properties = new Properties() + properties.setProperty("driverClassName", "com.mysql.jdbc.Driver") + properties.setProperty("url", "jdbc:mysql://linux1:3306/spark-streaming?useUnicode=true&characterEncoding=UTF-8") + properties.setProperty("username", "root") + properties.setProperty("password", "123123") + properties.setProperty("maxActive", "50") +// DruidDataSourceFactory.createDataSource(properties) + null + } + + //获取MySQL连接 + def getConnection: Connection = { + dataSource.getConnection + } + + //执行SQL语句,单条数据插入 + def executeUpdate(connection: Connection, sql: String, params: Array[Any]): Int = { + var rtn = 0 + var pstmt: PreparedStatement = null + try { + connection.setAutoCommit(false) + pstmt = connection.prepareStatement(sql) + + if (params != null && params.length > 0) { + for (i <- params.indices) { + pstmt.setObject(i + 1, params(i)) + } + } + rtn = pstmt.executeUpdate() + connection.commit() + pstmt.close() + } catch { + case e: Exception => e.printStackTrace() + } + rtn + } + + def isExist(connection: Connection, sql: String, params: Array[Any]): Boolean = { + var flag: Boolean = false + var pstmt: PreparedStatement = null + try { + pstmt = connection.prepareStatement(sql) + for (i <- params.indices) { + pstmt.setObject(i + 1, params(i)) + } + flag = pstmt.executeQuery().next() + pstmt.close() + } catch { + case e: Exception => e.printStackTrace() + } + flag + } +}