Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure flatten windows match #30410

Merged
merged 3 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sdks/python/apache_beam/transforms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3683,8 +3683,16 @@ def _extract_input_pvalues(self, pvalueish):
return pvalueish, pvalueish

def expand(self, pcolls):
windowing = self.get_windowing(pcolls)
for pcoll in pcolls:
self._check_pcollection(pcoll)
if pcoll.windowing != windowing:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we just log the error since raising the error could break some users' existing jobs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think that's a good idea. It turns out our test asserts actually do this already for some weird reasons, so I'm sure others do as well. That's what is causing CI to fail - https://github.com/apache/beam/blob/20675c860f46f3f4abce061a6b490166ca68df0f/sdks/python/apache_beam/testing/util.py#L286C7-L286C22

_LOGGER.warning(
'All input pcollections must have the same window. Windowing for '
'flatten set to %s, windowing of pcoll %s set to %s',
windowing,
pcoll,
pcoll.windowing)
is_bounded = all(pcoll.is_bounded for pcoll in pcolls)
return pvalue.PCollection(self.pipeline, is_bounded=is_bounded)

Expand Down
34 changes: 34 additions & 0 deletions sdks/python/apache_beam/transforms/core_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
import pytest

import apache_beam as beam
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
from apache_beam.transforms.window import FixedWindows


class TestDoFn1(beam.DoFn):
Expand Down Expand Up @@ -136,6 +139,37 @@ def process(self, element):
self.assertEqual(p6.is_bounded, False)


class FlattenTest(unittest.TestCase):
def test_flatten_identical_windows(self):
with beam.testing.test_pipeline.TestPipeline() as p:
source1 = p | "c1" >> beam.Create(
[1, 2, 3, 4, 5]) | "w1" >> beam.WindowInto(FixedWindows(100))
source2 = p | "c2" >> beam.Create([6, 7, 8]) | "w2" >> beam.WindowInto(
FixedWindows(100))
source3 = p | "c3" >> beam.Create([9, 10]) | "w3" >> beam.WindowInto(
FixedWindows(100))
out = (source1, source2, source3) | "flatten" >> beam.Flatten()
assert_that(out, equal_to([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))

def test_flatten_no_windows(self):
with beam.testing.test_pipeline.TestPipeline() as p:
source1 = p | "c1" >> beam.Create([1, 2, 3, 4, 5])
source2 = p | "c2" >> beam.Create([6, 7, 8])
source3 = p | "c3" >> beam.Create([9, 10])
out = (source1, source2, source3) | "flatten" >> beam.Flatten()
assert_that(out, equal_to([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))

def test_flatten_mismatched_windows(self):
with beam.testing.test_pipeline.TestPipeline() as p:
source1 = p | "c1" >> beam.Create(
[1, 2, 3, 4, 5]) | "w1" >> beam.WindowInto(FixedWindows(25))
source2 = p | "c2" >> beam.Create([6, 7, 8]) | "w2" >> beam.WindowInto(
FixedWindows(100))
source3 = p | "c3" >> beam.Create([9, 10]) | "w3" >> beam.WindowInto(
FixedWindows(100))
_ = (source1, source2, source3) | "flatten" >> beam.Flatten()


if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
unittest.main()
Loading