From 32d500ac4d36e1b94bfbfd19a30127d5aded78a9 Mon Sep 17 00:00:00 2001 From: Shinnnyshinshin Date: Tue, 2 Nov 2021 09:26:23 -0700 Subject: [PATCH] remove spark flag --- azure-pipelines-docs-integration.yml | 2 +- .../in_memory/spark.md | 20 +++++++++---------- .../in_memory/spark_python_example.py | 8 ++------ .../in_memory/spark_yaml_example.py | 8 ++------ 4 files changed, 15 insertions(+), 23 deletions(-) diff --git a/azure-pipelines-docs-integration.yml b/azure-pipelines-docs-integration.yml index 992ab215e32b..9cf8ea375304 100644 --- a/azure-pipelines-docs-integration.yml +++ b/azure-pipelines-docs-integration.yml @@ -96,7 +96,7 @@ stages: - script: | pip install pytest pytest-azurepipelines # TODO enable spark tests - pytest -v --docs-tests -m docs --no-spark --mysql --mssql tests/integration/test_script_runner.py + pytest -v --docs-tests -m docs --mysql --mssql tests/integration/test_script_runner.py displayName: 'pytest' env: # snowflake credentials diff --git a/docs/guides/connecting_to_your_data/in_memory/spark.md b/docs/guides/connecting_to_your_data/in_memory/spark.md index e84573a3fd1c..3f49e4ec4877 100644 --- a/docs/guides/connecting_to_your_data/in_memory/spark.md +++ b/docs/guides/connecting_to_your_data/in_memory/spark.md @@ -29,7 +29,7 @@ This will allow you to validate and explore your data. Import these necessary packages and modules. -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L1-L12 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L1-L10 ``` @@ -47,23 +47,23 @@ Using this example configuration add in the path to a directory that contains so ]}> -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L37-L47 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L33-L43 ``` Run this code to test your configuration. -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L49 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L45 ``` -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py#L37-L47 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py#L33-L43 ``` Run this code to test your configuration. -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py#L49 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py#L45 ``` @@ -84,13 +84,13 @@ Save the configuration into your `DataContext` by using the `add_datasource()` f ]}> -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L51 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L47 ``` -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py#L51 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py#L47 ``` @@ -102,17 +102,17 @@ Verify your new Datasource by loading data from it into a `Validator` using a `B Add the variable containing your dataframe (`df` in this example) to the `batch_data` key under `runtime_parameters` in your `BatchRequest`. -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L54-L60 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L50-L56 ``` :::note Note this guide uses a toy dataframe that looks like this. -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L20-L25 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L16-L20 ``` ::: Then load data into the `Validator`. -```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L62-L67 +```python file=../../../../tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py#L58-L63 ``` diff --git a/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py b/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py index 931a6e5dccba..19c6b0d7808e 100644 --- a/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py +++ b/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_python_example.py @@ -1,10 +1,8 @@ -import findspark -from pyspark import SparkContext -from pyspark.sql import SparkSession from ruamel import yaml import great_expectations as ge from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest +from great_expectations.core.util import get_or_create_spark_session from great_expectations.data_context import BaseDataContext from great_expectations.data_context.types.base import ( DataContextConfig, @@ -12,9 +10,7 @@ ) # Set up a basic spark dataframe -findspark.init() -sc = SparkContext(appName="app") -spark = SparkSession(sc) +spark = get_or_create_spark_session() # basic dataframe data = [ diff --git a/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py b/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py index 6b30394241c9..383b5c24b86b 100644 --- a/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py +++ b/tests/integration/docusaurus/connecting_to_your_data/in_memory/spark_yaml_example.py @@ -1,10 +1,8 @@ -import findspark -from pyspark import SparkContext -from pyspark.sql import SparkSession from ruamel import yaml import great_expectations as ge from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest +from great_expectations.core.util import get_or_create_spark_session from great_expectations.data_context import BaseDataContext from great_expectations.data_context.types.base import ( DataContextConfig, @@ -12,9 +10,7 @@ ) # Set up a basic spark dataframe -findspark.init() -sc = SparkContext(appName="app") -spark = SparkSession(sc) +spark = get_or_create_spark_session() # basic dataframe data = [