Skip to content

Commit

Permalink
Add the ability to test examples from yaml docs.
Browse files Browse the repository at this point in the history
  • Loading branch information
robertwb committed Feb 8, 2024
1 parent e9202ab commit fd886f4
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 42 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ sdks/python/LICENSE
sdks/python/NOTICE
sdks/python/README.md
sdks/python/apache_beam/portability/api/*
sdks/python/apache_beam/yaml/docs/*
sdks/python/nosetests*.xml
sdks/python/pytest*.xml
sdks/python/postcommit_requirements.txt
Expand Down
17 changes: 11 additions & 6 deletions sdks/python/apache_beam/yaml/readme_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,19 +287,24 @@ def expand(self, pcoll):
return pcoll


# These are copied from $ROOT/website/www/site/content/en/documentation/sdks
# at build time.
YAML_DOCS_DIR = os.path.join(os.path.join(os.path.dirname(__file__), 'docs'))

ReadMeTest = createTestSuite(
'ReadMeTest', os.path.join(os.path.dirname(__file__), 'README.md'))
'ReadMeTest', os.path.join(YAML_DOCS_DIR, 'yaml.md'))

ErrorHandlingTest = createTestSuite(
'ErrorHandlingTest',
os.path.join(os.path.dirname(__file__), 'yaml_errors.md'))
'ErrorHandlingTest', os.path.join(YAML_DOCS_DIR, 'yaml-errors.md'))

MappingTest = createTestSuite(
'MappingTest', os.path.join(YAML_DOCS_DIR, 'yaml-udf.md'))

CombineTest = createTestSuite(
'CombineTest', os.path.join(os.path.dirname(__file__), 'yaml_combine.md'))
'CombineTest', os.path.join(YAML_DOCS_DIR, 'yaml-combine.md'))

InlinePythonTest = createTestSuite(
'InlinePythonTest',
os.path.join(os.path.dirname(__file__), 'inline_python.md'))
'InlinePythonTest', os.path.join(YAML_DOCS_DIR, 'yaml-inline-python.md'))

if __name__ == '__main__':
parser = argparse.ArgumentParser()
Expand Down
6 changes: 0 additions & 6 deletions sdks/python/apache_beam/yaml/yaml_mapping_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
#

import logging
import os
import unittest

import apache_beam as beam
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
from apache_beam.yaml.readme_test import createTestSuite
from apache_beam.yaml.yaml_transform import YamlTransform

DATA = [
Expand Down Expand Up @@ -154,10 +152,6 @@ def test_validate_explicit_types(self):
self.assertEqual(result.element_type._fields[0][1], str)


YamlMappingDocTest = createTestSuite(
'YamlMappingDocTest',
os.path.join(os.path.dirname(__file__), 'yaml_mapping.md'))

if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
unittest.main()
86 changes: 56 additions & 30 deletions sdks/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

"""Apache Beam SDK for Python setup file."""

import glob
import os
import shutil
import subprocess
import sys
import warnings
Expand Down Expand Up @@ -148,11 +150,11 @@ def cythonize(*args, **kwargs):
# not called even though S3 was initialized. This could lead to a
# segmentation fault at exit. Keep pyarrow<13 until this is resolved.
pyarrow_dependency = [
'pyarrow>=3.0.0,<12.0.0',
# NOTE: We can remove this once Beam increases the pyarrow lower bound
# to a version that fixes CVE.
'pyarrow-hotfix<1'
]
'pyarrow>=3.0.0,<12.0.0',
# NOTE: We can remove this once Beam increases the pyarrow lower bound
# to a version that fixes CVE.
'pyarrow-hotfix<1'
]
else:
pyarrow_dependency = [
'pyarrow>=3.0.0,<15.0.0',
Expand All @@ -161,20 +163,21 @@ def cythonize(*args, **kwargs):
'pyarrow-hotfix<1'
]


# Exclude pandas<=1.4.2 since it doesn't work with numpy 1.24.x.
# Exclude 1.5.0 and 1.5.1 because of
# https://github.com/pandas-dev/pandas/issues/45725
dataframe_dependency = [
'pandas>=1.4.3,!=1.5.0,!=1.5.1,<2.1;python_version>="3.8"',
]


def find_by_ext(root_dir, ext):
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith(ext):
yield os.path.realpath(os.path.join(root, file))


# We must generate protos after setup_requires are installed.
def generate_protos_first():
try:
Expand All @@ -186,23 +189,42 @@ def generate_protos_first():
# skip proto generation in that case.
if not os.path.exists(os.path.join(cwd, 'gen_protos.py')):
# make sure we already generated protos
pb2_files = list(find_by_ext(os.path.join(
cwd, 'apache_beam', 'portability', 'api'), '_pb2.py'))
pb2_files = list(
find_by_ext(
os.path.join(cwd, 'apache_beam', 'portability', 'api'),
'_pb2.py'))
if not pb2_files:
raise RuntimeError('protobuf files are not generated. '
'Please generate pb2 files')
raise RuntimeError(
'protobuf files are not generated. '
'Please generate pb2 files')

warnings.warn('Skipping proto generation as they are already generated.')
return
out = subprocess.run([
sys.executable,
os.path.join(cwd, 'gen_protos.py'),
'--no-force'
], capture_output=True, check=True)
out = subprocess.run(
[sys.executable, os.path.join(cwd, 'gen_protos.py'), '--no-force'],
capture_output=True,
check=True)
print(out.stdout)
except subprocess.CalledProcessError as err:
raise RuntimeError('Could not generate protos due to error: %s',
err.stderr)
raise RuntimeError('Could not generate protos due to error: %s', err.stderr)


def copy_tests_from_docs():
python_root = os.path.abspath(os.path.dirname(__file__))
docs_src = os.path.normpath(
os.path.join(
python_root, '../../website/www/site/content/en/documentation/sdks'))
docs_dest = os.path.normpath(
os.path.join(python_root, 'apache_beam/yaml/docs'))
if os.path.exists(docs_src):
shutil.rmtree(docs_dest, ignore_errors=True)
os.mkdir(docs_dest)
for path in glob.glob(os.path.join(docs_src, 'yaml*.md')):
shutil.copy(path, docs_dest)
else:
if not os.path.exists(docs_dest):
raise RuntimeError(
f'Could not locate yaml docs in {docs_src} or {docs_dest}.')


def get_portability_package_data():
Expand Down Expand Up @@ -231,24 +253,27 @@ def get_portability_package_data():
# executes below.
generate_protos_first()

# These data files live elsewhere in the full Beam repository.
copy_tests_from_docs()

# generate cythonize extensions only if we are building a wheel or
# building an extension or running in editable mode.
cythonize_cmds = ('bdist_wheel', 'build_ext', 'editable_wheel')
if any(cmd in sys.argv for cmd in cythonize_cmds):
extensions = cythonize([
'apache_beam/**/*.pyx',
'apache_beam/coders/coder_impl.py',
'apache_beam/metrics/cells.py',
'apache_beam/metrics/execution.py',
'apache_beam/runners/common.py',
'apache_beam/runners/worker/logger.py',
'apache_beam/runners/worker/opcounters.py',
'apache_beam/runners/worker/operations.py',
'apache_beam/transforms/cy_combiners.py',
'apache_beam/transforms/stats.py',
'apache_beam/utils/counters.py',
'apache_beam/utils/windowed_value.py',
])
'apache_beam/**/*.pyx',
'apache_beam/coders/coder_impl.py',
'apache_beam/metrics/cells.py',
'apache_beam/metrics/execution.py',
'apache_beam/runners/common.py',
'apache_beam/runners/worker/logger.py',
'apache_beam/runners/worker/opcounters.py',
'apache_beam/runners/worker/operations.py',
'apache_beam/transforms/cy_combiners.py',
'apache_beam/transforms/stats.py',
'apache_beam/utils/counters.py',
'apache_beam/utils/windowed_value.py',
])
else:
extensions = []
# Keep all dependencies inlined in the setup call, otherwise Dependabot won't
Expand All @@ -273,6 +298,7 @@ def get_portability_package_data():
'*/*/*.h',
'testing/data/*.yaml',
'yaml/*.yaml',
'yaml/docs/*.md',
*get_portability_package_data()
]
},
Expand Down

0 comments on commit fd886f4

Please sign in to comment.