Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to test examples from yaml docs. #30198

Merged
merged 1 commit into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ sdks/python/LICENSE
sdks/python/NOTICE
sdks/python/README.md
sdks/python/apache_beam/portability/api/*
sdks/python/apache_beam/yaml/docs/*
sdks/python/nosetests*.xml
sdks/python/pytest*.xml
sdks/python/postcommit_requirements.txt
Expand Down
17 changes: 11 additions & 6 deletions sdks/python/apache_beam/yaml/readme_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,19 +287,24 @@ def expand(self, pcoll):
return pcoll


# These are copied from $ROOT/website/www/site/content/en/documentation/sdks
# at build time.
YAML_DOCS_DIR = os.path.join(os.path.join(os.path.dirname(__file__), 'docs'))

ReadMeTest = createTestSuite(
'ReadMeTest', os.path.join(os.path.dirname(__file__), 'README.md'))
'ReadMeTest', os.path.join(YAML_DOCS_DIR, 'yaml.md'))

ErrorHandlingTest = createTestSuite(
'ErrorHandlingTest',
os.path.join(os.path.dirname(__file__), 'yaml_errors.md'))
'ErrorHandlingTest', os.path.join(YAML_DOCS_DIR, 'yaml-errors.md'))

MappingTest = createTestSuite(
'MappingTest', os.path.join(YAML_DOCS_DIR, 'yaml-udf.md'))

CombineTest = createTestSuite(
'CombineTest', os.path.join(os.path.dirname(__file__), 'yaml_combine.md'))
'CombineTest', os.path.join(YAML_DOCS_DIR, 'yaml-combine.md'))

InlinePythonTest = createTestSuite(
'InlinePythonTest',
os.path.join(os.path.dirname(__file__), 'inline_python.md'))
'InlinePythonTest', os.path.join(YAML_DOCS_DIR, 'yaml-inline-python.md'))

if __name__ == '__main__':
parser = argparse.ArgumentParser()
Expand Down
6 changes: 0 additions & 6 deletions sdks/python/apache_beam/yaml/yaml_mapping_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
#

import logging
import os
import unittest

import apache_beam as beam
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
from apache_beam.yaml.readme_test import createTestSuite
from apache_beam.yaml.yaml_transform import YamlTransform

DATA = [
Expand Down Expand Up @@ -154,10 +152,6 @@ def test_validate_explicit_types(self):
self.assertEqual(result.element_type._fields[0][1], str)


YamlMappingDocTest = createTestSuite(
'YamlMappingDocTest',
os.path.join(os.path.dirname(__file__), 'yaml_mapping.md'))

if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
unittest.main()
86 changes: 56 additions & 30 deletions sdks/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

"""Apache Beam SDK for Python setup file."""

import glob
import os
import shutil
import subprocess
import sys
import warnings
Expand Down Expand Up @@ -148,11 +150,11 @@ def cythonize(*args, **kwargs):
# not called even though S3 was initialized. This could lead to a
# segmentation fault at exit. Keep pyarrow<13 until this is resolved.
pyarrow_dependency = [
'pyarrow>=3.0.0,<12.0.0',
# NOTE: We can remove this once Beam increases the pyarrow lower bound
# to a version that fixes CVE.
'pyarrow-hotfix<1'
]
'pyarrow>=3.0.0,<12.0.0',
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: these changes are due to a long overdue yapf run on this file.

# NOTE: We can remove this once Beam increases the pyarrow lower bound
# to a version that fixes CVE.
'pyarrow-hotfix<1'
]
else:
pyarrow_dependency = [
'pyarrow>=3.0.0,<15.0.0',
Expand All @@ -161,20 +163,21 @@ def cythonize(*args, **kwargs):
'pyarrow-hotfix<1'
]


# Exclude pandas<=1.4.2 since it doesn't work with numpy 1.24.x.
# Exclude 1.5.0 and 1.5.1 because of
# https://github.com/pandas-dev/pandas/issues/45725
dataframe_dependency = [
'pandas>=1.4.3,!=1.5.0,!=1.5.1,<2.1;python_version>="3.8"',
]


def find_by_ext(root_dir, ext):
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith(ext):
yield os.path.realpath(os.path.join(root, file))


# We must generate protos after setup_requires are installed.
def generate_protos_first():
try:
Expand All @@ -186,23 +189,42 @@ def generate_protos_first():
# skip proto generation in that case.
if not os.path.exists(os.path.join(cwd, 'gen_protos.py')):
# make sure we already generated protos
pb2_files = list(find_by_ext(os.path.join(
cwd, 'apache_beam', 'portability', 'api'), '_pb2.py'))
pb2_files = list(
find_by_ext(
os.path.join(cwd, 'apache_beam', 'portability', 'api'),
'_pb2.py'))
if not pb2_files:
raise RuntimeError('protobuf files are not generated. '
'Please generate pb2 files')
raise RuntimeError(
'protobuf files are not generated. '
'Please generate pb2 files')

warnings.warn('Skipping proto generation as they are already generated.')
return
out = subprocess.run([
sys.executable,
os.path.join(cwd, 'gen_protos.py'),
'--no-force'
], capture_output=True, check=True)
out = subprocess.run(
[sys.executable, os.path.join(cwd, 'gen_protos.py'), '--no-force'],
capture_output=True,
check=True)
print(out.stdout)
except subprocess.CalledProcessError as err:
raise RuntimeError('Could not generate protos due to error: %s',
err.stderr)
raise RuntimeError('Could not generate protos due to error: %s', err.stderr)


def copy_tests_from_docs():
python_root = os.path.abspath(os.path.dirname(__file__))
docs_src = os.path.normpath(
os.path.join(
python_root, '../../website/www/site/content/en/documentation/sdks'))
docs_dest = os.path.normpath(
os.path.join(python_root, 'apache_beam/yaml/docs'))
if os.path.exists(docs_src):
shutil.rmtree(docs_dest, ignore_errors=True)
os.mkdir(docs_dest)
for path in glob.glob(os.path.join(docs_src, 'yaml*.md')):
shutil.copy(path, docs_dest)
else:
if not os.path.exists(docs_dest):
raise RuntimeError(
f'Could not locate yaml docs in {docs_src} or {docs_dest}.')


def get_portability_package_data():
Expand Down Expand Up @@ -231,24 +253,27 @@ def get_portability_package_data():
# executes below.
generate_protos_first()

# These data files live elsewhere in the full Beam repository.
copy_tests_from_docs()

# generate cythonize extensions only if we are building a wheel or
# building an extension or running in editable mode.
cythonize_cmds = ('bdist_wheel', 'build_ext', 'editable_wheel')
if any(cmd in sys.argv for cmd in cythonize_cmds):
extensions = cythonize([
'apache_beam/**/*.pyx',
'apache_beam/coders/coder_impl.py',
'apache_beam/metrics/cells.py',
'apache_beam/metrics/execution.py',
'apache_beam/runners/common.py',
'apache_beam/runners/worker/logger.py',
'apache_beam/runners/worker/opcounters.py',
'apache_beam/runners/worker/operations.py',
'apache_beam/transforms/cy_combiners.py',
'apache_beam/transforms/stats.py',
'apache_beam/utils/counters.py',
'apache_beam/utils/windowed_value.py',
])
'apache_beam/**/*.pyx',
'apache_beam/coders/coder_impl.py',
'apache_beam/metrics/cells.py',
'apache_beam/metrics/execution.py',
'apache_beam/runners/common.py',
'apache_beam/runners/worker/logger.py',
'apache_beam/runners/worker/opcounters.py',
'apache_beam/runners/worker/operations.py',
'apache_beam/transforms/cy_combiners.py',
'apache_beam/transforms/stats.py',
'apache_beam/utils/counters.py',
'apache_beam/utils/windowed_value.py',
])
else:
extensions = []
# Keep all dependencies inlined in the setup call, otherwise Dependabot won't
Expand All @@ -273,6 +298,7 @@ def get_portability_package_data():
'*/*/*.h',
'testing/data/*.yaml',
'yaml/*.yaml',
'yaml/docs/*.md',
*get_portability_package_data()
]
},
Expand Down
Loading