From fd886f4fa7468ed7ac5a6c4e4c13e98f6691e59f Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Tue, 30 Jan 2024 17:30:01 -0800 Subject: [PATCH] Add the ability to test examples from yaml docs. --- .gitignore | 1 + sdks/python/apache_beam/yaml/readme_test.py | 17 ++-- .../apache_beam/yaml/yaml_mapping_test.py | 6 -- sdks/python/setup.py | 86 ++++++++++++------- 4 files changed, 68 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 0852e63dbd3e5..127a0f800e165 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,7 @@ sdks/python/LICENSE sdks/python/NOTICE sdks/python/README.md sdks/python/apache_beam/portability/api/* +sdks/python/apache_beam/yaml/docs/* sdks/python/nosetests*.xml sdks/python/pytest*.xml sdks/python/postcommit_requirements.txt diff --git a/sdks/python/apache_beam/yaml/readme_test.py b/sdks/python/apache_beam/yaml/readme_test.py index dca1dbbc73659..85ce47d0a3d3f 100644 --- a/sdks/python/apache_beam/yaml/readme_test.py +++ b/sdks/python/apache_beam/yaml/readme_test.py @@ -287,19 +287,24 @@ def expand(self, pcoll): return pcoll +# These are copied from $ROOT/website/www/site/content/en/documentation/sdks +# at build time. +YAML_DOCS_DIR = os.path.join(os.path.join(os.path.dirname(__file__), 'docs')) + ReadMeTest = createTestSuite( - 'ReadMeTest', os.path.join(os.path.dirname(__file__), 'README.md')) + 'ReadMeTest', os.path.join(YAML_DOCS_DIR, 'yaml.md')) ErrorHandlingTest = createTestSuite( - 'ErrorHandlingTest', - os.path.join(os.path.dirname(__file__), 'yaml_errors.md')) + 'ErrorHandlingTest', os.path.join(YAML_DOCS_DIR, 'yaml-errors.md')) + +MappingTest = createTestSuite( + 'MappingTest', os.path.join(YAML_DOCS_DIR, 'yaml-udf.md')) CombineTest = createTestSuite( - 'CombineTest', os.path.join(os.path.dirname(__file__), 'yaml_combine.md')) + 'CombineTest', os.path.join(YAML_DOCS_DIR, 'yaml-combine.md')) InlinePythonTest = createTestSuite( - 'InlinePythonTest', - os.path.join(os.path.dirname(__file__), 'inline_python.md')) + 'InlinePythonTest', os.path.join(YAML_DOCS_DIR, 'yaml-inline-python.md')) if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/sdks/python/apache_beam/yaml/yaml_mapping_test.py b/sdks/python/apache_beam/yaml/yaml_mapping_test.py index 0de2f7022550c..9dca107dca51f 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping_test.py +++ b/sdks/python/apache_beam/yaml/yaml_mapping_test.py @@ -16,13 +16,11 @@ # import logging -import os import unittest import apache_beam as beam from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to -from apache_beam.yaml.readme_test import createTestSuite from apache_beam.yaml.yaml_transform import YamlTransform DATA = [ @@ -154,10 +152,6 @@ def test_validate_explicit_types(self): self.assertEqual(result.element_type._fields[0][1], str) -YamlMappingDocTest = createTestSuite( - 'YamlMappingDocTest', - os.path.join(os.path.dirname(__file__), 'yaml_mapping.md')) - if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) unittest.main() diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 5e78c491598fd..a5a14c035dc2f 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -17,7 +17,9 @@ """Apache Beam SDK for Python setup file.""" +import glob import os +import shutil import subprocess import sys import warnings @@ -148,11 +150,11 @@ def cythonize(*args, **kwargs): # not called even though S3 was initialized. This could lead to a # segmentation fault at exit. Keep pyarrow<13 until this is resolved. pyarrow_dependency = [ - 'pyarrow>=3.0.0,<12.0.0', - # NOTE: We can remove this once Beam increases the pyarrow lower bound - # to a version that fixes CVE. - 'pyarrow-hotfix<1' - ] + 'pyarrow>=3.0.0,<12.0.0', + # NOTE: We can remove this once Beam increases the pyarrow lower bound + # to a version that fixes CVE. + 'pyarrow-hotfix<1' + ] else: pyarrow_dependency = [ 'pyarrow>=3.0.0,<15.0.0', @@ -161,7 +163,6 @@ def cythonize(*args, **kwargs): 'pyarrow-hotfix<1' ] - # Exclude pandas<=1.4.2 since it doesn't work with numpy 1.24.x. # Exclude 1.5.0 and 1.5.1 because of # https://github.com/pandas-dev/pandas/issues/45725 @@ -169,12 +170,14 @@ def cythonize(*args, **kwargs): 'pandas>=1.4.3,!=1.5.0,!=1.5.1,<2.1;python_version>="3.8"', ] + def find_by_ext(root_dir, ext): for root, _, files in os.walk(root_dir): for file in files: if file.endswith(ext): yield os.path.realpath(os.path.join(root, file)) + # We must generate protos after setup_requires are installed. def generate_protos_first(): try: @@ -186,23 +189,42 @@ def generate_protos_first(): # skip proto generation in that case. if not os.path.exists(os.path.join(cwd, 'gen_protos.py')): # make sure we already generated protos - pb2_files = list(find_by_ext(os.path.join( - cwd, 'apache_beam', 'portability', 'api'), '_pb2.py')) + pb2_files = list( + find_by_ext( + os.path.join(cwd, 'apache_beam', 'portability', 'api'), + '_pb2.py')) if not pb2_files: - raise RuntimeError('protobuf files are not generated. ' - 'Please generate pb2 files') + raise RuntimeError( + 'protobuf files are not generated. ' + 'Please generate pb2 files') warnings.warn('Skipping proto generation as they are already generated.') return - out = subprocess.run([ - sys.executable, - os.path.join(cwd, 'gen_protos.py'), - '--no-force' - ], capture_output=True, check=True) + out = subprocess.run( + [sys.executable, os.path.join(cwd, 'gen_protos.py'), '--no-force'], + capture_output=True, + check=True) print(out.stdout) except subprocess.CalledProcessError as err: - raise RuntimeError('Could not generate protos due to error: %s', - err.stderr) + raise RuntimeError('Could not generate protos due to error: %s', err.stderr) + + +def copy_tests_from_docs(): + python_root = os.path.abspath(os.path.dirname(__file__)) + docs_src = os.path.normpath( + os.path.join( + python_root, '../../website/www/site/content/en/documentation/sdks')) + docs_dest = os.path.normpath( + os.path.join(python_root, 'apache_beam/yaml/docs')) + if os.path.exists(docs_src): + shutil.rmtree(docs_dest, ignore_errors=True) + os.mkdir(docs_dest) + for path in glob.glob(os.path.join(docs_src, 'yaml*.md')): + shutil.copy(path, docs_dest) + else: + if not os.path.exists(docs_dest): + raise RuntimeError( + f'Could not locate yaml docs in {docs_src} or {docs_dest}.') def get_portability_package_data(): @@ -231,24 +253,27 @@ def get_portability_package_data(): # executes below. generate_protos_first() + # These data files live elsewhere in the full Beam repository. + copy_tests_from_docs() + # generate cythonize extensions only if we are building a wheel or # building an extension or running in editable mode. cythonize_cmds = ('bdist_wheel', 'build_ext', 'editable_wheel') if any(cmd in sys.argv for cmd in cythonize_cmds): extensions = cythonize([ - 'apache_beam/**/*.pyx', - 'apache_beam/coders/coder_impl.py', - 'apache_beam/metrics/cells.py', - 'apache_beam/metrics/execution.py', - 'apache_beam/runners/common.py', - 'apache_beam/runners/worker/logger.py', - 'apache_beam/runners/worker/opcounters.py', - 'apache_beam/runners/worker/operations.py', - 'apache_beam/transforms/cy_combiners.py', - 'apache_beam/transforms/stats.py', - 'apache_beam/utils/counters.py', - 'apache_beam/utils/windowed_value.py', - ]) + 'apache_beam/**/*.pyx', + 'apache_beam/coders/coder_impl.py', + 'apache_beam/metrics/cells.py', + 'apache_beam/metrics/execution.py', + 'apache_beam/runners/common.py', + 'apache_beam/runners/worker/logger.py', + 'apache_beam/runners/worker/opcounters.py', + 'apache_beam/runners/worker/operations.py', + 'apache_beam/transforms/cy_combiners.py', + 'apache_beam/transforms/stats.py', + 'apache_beam/utils/counters.py', + 'apache_beam/utils/windowed_value.py', + ]) else: extensions = [] # Keep all dependencies inlined in the setup call, otherwise Dependabot won't @@ -273,6 +298,7 @@ def get_portability_package_data(): '*/*/*.h', 'testing/data/*.yaml', 'yaml/*.yaml', + 'yaml/docs/*.md', *get_portability_package_data() ] },