From 9c04e31030dda3baaa4ed6c27de47d7af35719b7 Mon Sep 17 00:00:00 2001 From: shivsood Date: Tue, 20 Oct 2020 12:30:33 -0700 Subject: [PATCH] Sample : SparkSQL usage with connectors --- samples/UsageWithSparkSQL.ipynb | 425 ++++++++++++++++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 samples/UsageWithSparkSQL.ipynb diff --git a/samples/UsageWithSparkSQL.ipynb b/samples/UsageWithSparkSQL.ipynb new file mode 100644 index 0000000..dfb7031 --- /dev/null +++ b/samples/UsageWithSparkSQL.ipynb @@ -0,0 +1,425 @@ +{ + "metadata": { + "kernelspec": { + "name": "pysparkkernel", + "display_name": "PySpark" + }, + "language_info": { + "name": "pyspark", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "python", + "version": 2 + }, + "pygments_lexer": "python2" + } + }, + "nbformat_minor": 2, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Demonstrates using Spark SQL with JDBC connectors" + ], + "metadata": { + "azdata_cell_guid": "68a65c32-290f-4490-93fe-ee3cbbe3bf96" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"Hello Spark SQL\")" + ], + "metadata": { + "azdata_cell_guid": "26c056ff-b9f4-41f9-aa49-627d23626fd5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Starting Spark application\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "\n
IDYARN Application IDKindStateSpark UIDriver logCurrent session?
240application_1602365873887_0017pysparkidleLinkLink
" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "de7863cc5a084be4b0172ed26fa573df" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "SparkSession available as 'spark'.\n" + }, + { + "output_type": "display_data", + "data": { + "text/plain": "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "180b430608b146f88bcd769b4ec3a34e" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": "hello" + } + ], + "execution_count": 3 + }, + { + "cell_type": "markdown", + "source": [ + "# Create a Spark table from a SQL Server table using SparkSQL and JDBC connector" + ], + "metadata": { + "azdata_cell_guid": "867e8c0c-1990-46a2-a364-3580da390e9e" + } + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "\r\n", + "CREATE TABLE spark_table_1\r\n", + "USING org.apache.spark.sql.jdbc\r\n", + "OPTIONS (\r\n", + " url \"jdbc:sqlserver://master-0.master-svc\",\r\n", + " databasename \"connector_test_db\",\r\n", + " dbtable \"input_table\",\r\n", + " user \"connector_user\",\r\n", + " password \"password123!#\"\r\n", + ")\r\n", + "" + ], + "metadata": { + "azdata_cell_guid": "81e79dee-6a5e-4593-988b-517cf6851604" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "HTML(value='No results.')", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "94dfe7fd6988424186230dc77829a23a" + } + }, + "metadata": {} + } + ], + "execution_count": 16 + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "select * from spark_table_1 limit 2\r\n", + "" + ], + "metadata": { + "azdata_cell_guid": "6535fc7f-b1c0-455b-83d3-e62a633c7263" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": " \n " + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "\n\n" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ss_sold_date_skss_sold_time_skss_item_skss_customer_skss_cdemo_skss_hdemo_skss_addr_skss_store_skss_promo_skss_ticket_numberss_quantityss_wholesale_costss_list_pricess_sales_pricess_ext_discount_amtss_ext_sales_pricess_ext_wholesale_costss_ext_list_pricess_ext_taxss_coupon_amtss_net_paidss_net_paid_inc_taxss_net_profit
0245147568697182857190340363492889582163284779.44128.6930.880.01451.363733.686048.43101.590.01451.361552.95-2282.32
124511695141917573015641399014371042815207897.36131.4378.850.06150.307594.0810251.54369.010.06150.306519.31-1443.78
\n
" + }, + "metadata": {} + } + ], + "execution_count": 18 + }, + { + "cell_type": "markdown", + "source": [ + "# Create a Spark VIEW from a SQL Server TABLE using SparkSQL and JDBC connector" + ], + "metadata": { + "azdata_cell_guid": "4f339cba-61f3-45d5-a18b-330050f09c59" + } + }, + { + "cell_type": "code", + "source": [ + "%%sql\n", + "\n", + "CREATE TEMPORARY VIEW spark_view_1\n", + "USING org.apache.spark.sql.jdbc\n", + "OPTIONS (\n", + " url \"jdbc:sqlserver://master-0.master-svc\",\n", + " databasename \"connector_test_db\",\n", + " dbtable \"input_table\",\n", + " user \"connector_user\",\n", + " password \"password123!#\"\n", + ")" + ], + "metadata": { + "azdata_cell_guid": "f742d67f-41f8-4cc4-8b8d-eb60d6ab825b" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "HTML(value='No results.')", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "271cc05452a34414bee3f014993ee151" + } + }, + "metadata": {} + } + ], + "execution_count": 19 + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "select * from spark_view_1 limit 2" + ], + "metadata": { + "azdata_cell_guid": "90e52c53-6106-438c-b694-3d7cdcb5b3ca" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": " \n " + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "\n\n" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ss_sold_date_skss_sold_time_skss_item_skss_customer_skss_cdemo_skss_hdemo_skss_addr_skss_store_skss_promo_skss_ticket_numberss_quantityss_wholesale_costss_list_pricess_sales_pricess_ext_discount_amtss_ext_sales_pricess_ext_wholesale_costss_ext_list_pricess_ext_taxss_coupon_amtss_net_paidss_net_paid_inc_taxss_net_profit
0245147568697182857190340363492889582163284779.44128.6930.880.01451.363733.686048.43101.590.01451.361552.95-2282.32
124511695141917573015641399014371042815207897.36131.4378.850.06150.307594.0810251.54369.010.06150.306519.31-1443.78
\n
" + }, + "metadata": {} + } + ], + "execution_count": 20 + }, + { + "cell_type": "markdown", + "source": [ + "# Create a Spark table from a SQL Server table using SparkSQL and Spark SQL connector\r\n", + "\r\n", + "Note that for \"reads\" Spark SQL connector delegates control to JDBC connector" + ], + "metadata": { + "azdata_cell_guid": "7d31406c-ddd1-48de-90e9-1317407a259c" + } + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "\r\n", + "CREATE TABLE spark_table_2\r\n", + "USING com.microsoft.sqlserver.jdbc.spark\r\n", + "OPTIONS (\r\n", + " url \"jdbc:sqlserver://master-0.master-svc\",\r\n", + " databasename \"connector_test_db\",\r\n", + " dbtable \"input_table\",\r\n", + " user \"connector_user\",\r\n", + " password \"password123!#\"\r\n", + ")\r\n", + "" + ], + "metadata": { + "azdata_cell_guid": "214d85a7-441a-4d8e-8eeb-785e02a4f5fc" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "HTML(value='No results.')", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "13b477e20cdb4141a15ed7b1033fbb51" + } + }, + "metadata": {} + } + ], + "execution_count": 21 + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "select * from spark_table_2 limit 2" + ], + "metadata": { + "azdata_cell_guid": "e23fcb73-dd3b-49f1-a43b-704c9844cf55" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": " \n " + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "\n\n" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ss_sold_date_skss_sold_time_skss_item_skss_customer_skss_cdemo_skss_hdemo_skss_addr_skss_store_skss_promo_skss_ticket_numberss_quantityss_wholesale_costss_list_pricess_sales_pricess_ext_discount_amtss_ext_sales_pricess_ext_wholesale_costss_ext_list_pricess_ext_taxss_coupon_amtss_net_paidss_net_paid_inc_taxss_net_profit
0245147568697182857190340363492889582163284779.44128.6930.880.01451.363733.686048.43101.590.01451.361552.95-2282.32
124511695141917573015641399014371042815207897.36131.4378.850.06150.307594.0810251.54369.010.06150.306519.31-1443.78
\n
" + }, + "metadata": {} + } + ], + "execution_count": 22 + }, + { + "cell_type": "markdown", + "source": [ + "# Create a Spark VIEW from a SQLServer TABLE using SparkSQL and SparkSQL connector\r\n", + "\r\n", + "Note that for \"reads\" Spark SQL connector delegates control to JDBC connector" + ], + "metadata": { + "azdata_cell_guid": "f46206ce-8cdb-4840-953f-737353e3ea10" + } + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "azdata_cell_guid": "8069bdf2-d066-4a41-9251-9da1bd7d8797" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "\r\n", + "CREATE TEMPORARY VIEW spark_view_2\r\n", + "USING com.microsoft.sqlserver.jdbc.spark\r\n", + "OPTIONS (\r\n", + " url \"jdbc:sqlserver://master-0.master-svc\",\r\n", + " databasename \"connector_test_db\",\r\n", + " dbtable \"input_table\",\r\n", + " user \"connector_user\",\r\n", + " password \"password123!#\"\r\n", + ")\r\n", + "\r\n", + "" + ], + "metadata": { + "azdata_cell_guid": "0a4dfbb7-2edc-4c68-899f-2f9d45be331b" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "HTML(value='No results.')", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "3018a69ad52a4d40bda380a52c62f18c" + } + }, + "metadata": {} + } + ], + "execution_count": 23 + }, + { + "cell_type": "code", + "source": [ + "%%sql\r\n", + "select * from spark_view_2 limit 2" + ], + "metadata": { + "azdata_cell_guid": "d35216bf-0333-4de1-8cfb-a471eb02f588" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": " \n " + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "\n\n" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ss_sold_date_skss_sold_time_skss_item_skss_customer_skss_cdemo_skss_hdemo_skss_addr_skss_store_skss_promo_skss_ticket_numberss_quantityss_wholesale_costss_list_pricess_sales_pricess_ext_discount_amtss_ext_sales_pricess_ext_wholesale_costss_ext_list_pricess_ext_taxss_coupon_amtss_net_paidss_net_paid_inc_taxss_net_profit
0245147568697182857190340363492889582163284779.44128.6930.880.01451.363733.686048.43101.590.01451.361552.95-2282.32
124511695141917573015641399014371042815207897.36131.4378.850.06150.307594.0810251.54369.010.06150.306519.31-1443.78
\n
" + }, + "metadata": {} + } + ], + "execution_count": 24 + } + ] +} \ No newline at end of file