Skip to content

Commit

Permalink
Make "changed" tasks work with deleted files (#4546)
Browse files Browse the repository at this point in the history
### Problem

#4529 

There are 2 kinds of "changed" tasks in pants currently. Neither of them output correct result if a file is deleted.
1. "./pants changed" tasks. They used to have the correct output, but after #4350 , the logic of file matching in SpecSourceMapper was changed. We used to do a regular expression matching to check if the modified file is owned by a target. Now we check if a modified file is in a target's source file set. For a deleted file, it will not be in any target's source file set, thus no target will be considered as the owner of the deleted file, giving incorrect result.
2. "./pants --changed-XXX list". This uses EngineSourceMapper which implements similar logic as above. It never works on deleted files.

### Solution

I restored the removed file matching logic (using regular expression). In both SpecSourceMapper and EngineSourceMapper, first check if a file exists. It it exists, then check if it's in target's file set. If not, then it is a deleted file, and use re matching logic on it. Fixes #4529
  • Loading branch information
JieGhost committed May 4, 2017
1 parent b04ce10 commit 0510f02
Show file tree
Hide file tree
Showing 8 changed files with 246 additions and 37 deletions.
3 changes: 2 additions & 1 deletion src/python/pants/engine/legacy/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ python_library(
dependencies=[
':address_mapper',
':graph',
'src/python/pants/base:build_file',
'src/python/pants/base:specs',
'src/python/pants/build_graph',
'src/python/pants/source',
]
)

Expand Down
33 changes: 18 additions & 15 deletions src/python/pants/engine/legacy/source_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pants.build_graph.source_mapper import SourceMapper
from pants.engine.legacy.address_mapper import LegacyAddressMapper
from pants.engine.legacy.graph import HydratedTargets
from pants.source.filespec import matches_filespec
from pants.source.wrapped_globs import EagerFilesetWithSpec


Expand Down Expand Up @@ -48,20 +49,24 @@ def _unique_dirs_for_sources(self, sources):
def target_addresses_for_source(self, source):
return list(self.iter_target_addresses_for_sources([source]))

def _iter_owned_files_from_hydrated_target(self, legacy_target):
"""Given a `HydratedTarget` instance, yield all files owned by the target."""
def _match_source(self, source, fileset):
return fileset.matches(source) or matches_filespec(source, fileset.filespec)

def _owns_source(self, source, legacy_target):
"""Given a `HydratedTarget` instance, check if it owns the given source file."""
target_kwargs = legacy_target.adaptor.kwargs()

# Handle targets like `python_binary` which have a singular `source='main.py'` declaration.
target_source = target_kwargs.get('source')
if target_source:
yield os.path.join(legacy_target.adaptor.address.spec_path, target_source)
path_from_build_root = os.path.join(legacy_target.adaptor.address.spec_path, target_source)
if path_from_build_root == source:
return True

# Handle `sources`-declaring targets.
target_sources = target_kwargs.get('sources', [])
if target_sources:
for f in target_sources.paths_from_buildroot_iter():
yield f
if target_sources and self._match_source(source, target_sources):
return True

# Handle `resources`-declaring targets.
# TODO: Remember to get rid of this in 1.5.0.dev0, when the deprecation of `resources` is complete.
Expand All @@ -74,8 +79,8 @@ def _iter_owned_files_from_hydrated_target(self, legacy_target):
# python_library(..., resources=['file.txt', 'file2.txt'])
#
if isinstance(target_resources, EagerFilesetWithSpec):
for f in target_resources.paths_from_buildroot_iter():
yield f
if self._match_source(source, target_resources):
return True
# 2) Strings of addresses, which are represented in kwargs by a list of strings:
#
# java_library(..., resources=['testprojects/src/resources/...:resource'])
Expand All @@ -87,12 +92,13 @@ def _iter_owned_files_from_hydrated_target(self, legacy_target):
for hydrated_targets in self._engine.product_request(HydratedTargets, resource_dep_subjects):
for hydrated_target in hydrated_targets.dependencies:
resource_sources = hydrated_target.adaptor.kwargs().get('sources')
if resource_sources:
for f in resource_sources.paths_from_buildroot_iter():
yield f
if resource_sources and self._match_source(source, resource_sources):
return True
else:
raise AssertionError('Could not process target_resources with type {}'.format(type(target_resources)))

return False

def iter_target_addresses_for_sources(self, sources):
"""Bulk, iterable form of `target_addresses_for_source`."""
# Walk up the buildroot looking for targets that would conceivably claim changed sources.
Expand All @@ -107,8 +113,5 @@ def iter_target_addresses_for_sources(self, sources):
if any(LegacyAddressMapper.is_declaring_file(legacy_address, f) for f in sources_set):
yield legacy_address
else:
# Handle claimed files.
target_files_iter = self._iter_owned_files_from_hydrated_target(hydrated_target)
if any(source_file in sources_set for source_file in target_files_iter):
# At least one file in this targets sources match our changed sources - emit its address.
if any(self._owns_source(source, hydrated_target) for source in sources_set):
yield legacy_address
59 changes: 59 additions & 0 deletions src/python/pants/source/filespec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# coding=utf-8
# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import re


def glob_to_regex(pattern):
"""Given a glob pattern, return an equivalent regex expression.
:param string glob: The glob pattern. "**" matches 0 or more dirs recursively.
"*" only matches patterns in a single dir.
:returns: A regex string that matches same paths as the input glob does.
"""
out = ['^']
components = pattern.strip('/').replace('.', '[.]').replace('$','[$]').split('/')
doublestar = False
for component in components:
if len(out) == 1:
if pattern.startswith('/'):
out.append('/')
else:
if not doublestar:
out.append('/')

if '**' in component:
if component != '**':
raise ValueError('Invalid usage of "**", use "*" instead.')

if not doublestar:
out.append('(([^/]+/)*)')
doublestar = True
else:
out.append(component.replace('*', '[^/]*'))
doublestar = False

if doublestar:
out.append('[^/]*')

out.append('$')

return ''.join(out)


def globs_matches(path, patterns):
return any(re.match(glob_to_regex(pattern), path) for pattern in patterns)


def matches_filespec(path, spec):
if spec is None:
return False
if not globs_matches(path, spec.get('globs', [])):
return False
for spec in spec.get('exclude', []):
if matches_filespec(path, spec):
return False
return True
4 changes: 3 additions & 1 deletion src/python/pants/source/payload_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import os
from hashlib import sha1

from pants.base.payload_field import PayloadField
from pants.source.filespec import matches_filespec
from pants.source.source_root import SourceRootConfig
from pants.source.wrapped_globs import FilesetWithSpec
from pants.util.memo import memoized_property
Expand All @@ -33,7 +35,7 @@ def source_root(self):
return SourceRootConfig.global_instance().get_source_roots().find_by_path(self.rel_path)

def matches(self, path):
return self.sources.matches(path)
return self.sources.matches(path) or matches_filespec(path, self.filespec)

@property
def filespec(self):
Expand Down
40 changes: 21 additions & 19 deletions tests/python/pants_test/core_tasks/test_what_changed.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,20 +111,24 @@ def setUp(self):
]
)
"""))
self.create_file('root/src/py/a/b/c', contents='', mode='w')
self.create_file('root/src/py/a/d', contents='', mode='w')

self.add_to_build_file('root/src/py/1', dedent("""
python_library(
name='numeric',
sources=['2']
)
"""))
self.create_file('root/src/py/1/2', contents='', mode='w')

self.add_to_build_file('root/src/py/dependency_tree/a', dedent("""
python_library(
name='a',
sources=['a.py'],
)
"""))
self.create_file('root/src/py/dependency_tree/a/a.py', contents='', mode='w')

self.add_to_build_file('root/src/py/dependency_tree/b', dedent("""
python_library(
Expand All @@ -133,6 +137,7 @@ def setUp(self):
dependencies=['root/src/py/dependency_tree/a']
)
"""))
self.create_file('root/src/py/dependency_tree/b/b.py', contents='', mode='w')

self.add_to_build_file('root/src/py/dependency_tree/c', dedent("""
python_library(
Expand All @@ -141,6 +146,7 @@ def setUp(self):
dependencies=['root/src/py/dependency_tree/b']
)
"""))
self.create_file('root/src/py/dependency_tree/c/c.py', contents='', mode='w')

self.add_to_build_file('root/src/thrift', dedent("""
java_thrift_library(
Expand All @@ -153,27 +159,33 @@ def setUp(self):
sources=['a.thrift']
)
"""))
self.create_file('root/src/thrift/a.thrift', contents='', mode='w')

self.add_to_build_file('root/src/resources/a', dedent("""
resources(
name='a_resources',
sources=['a.resources']
)
"""))
self.create_file('root/src/resources/a/a.resources', contents='', mode='w')

self.add_to_build_file('root/src/java/a', dedent("""
java_library(
name='a_java',
sources=rglobs("*.java"),
)
"""))
self.create_file('root/src/java/a/foo.java', contents='', mode='w')
self.create_file('root/src/java/a/b/foo.java', contents='', mode='w')

self.add_to_build_file('root/src/java/b', dedent("""
java_library(
name='b_java',
sources=globs("*.java"),
)
"""))
self.create_file('root/src/java/b/foo.java', contents='', mode='w')
self.create_file('root/src/java/b/b/foo.java', contents='', mode='w')

self.add_to_build_file('root/3rdparty/BUILD.twitter', dedent("""
jar_library(
Expand Down Expand Up @@ -201,13 +213,16 @@ def setUp(self):
sources=['a/build/scripts.java'],
)
"""))
self.create_file('root/scripts/a/build/scripts.java', contents='', mode='w')

self.add_to_build_file('BUILD.config', dedent("""
resources(
name='pants-config',
sources = globs('pants.ini*')
)
"""))
self.create_file('pants.ini', contents='', mode='w')
self.create_file('pants.ini.backup', contents='', mode='w')


class WhatChangedTest(WhatChangedTestBasic):
Expand Down Expand Up @@ -299,8 +314,9 @@ def test_diffspec(self):
)

def test_diffspec_removed_files(self):
# This file was not created in setup stage.
file_in_target = 'root/src/java/a/b/c/Foo.java'
self.create_file(relpath=file_in_target, contents='', mode='a')

self.assert_console_output(
'root/src/java/a:a_java',
options={'diffspec': '42'},
Expand Down Expand Up @@ -376,32 +392,20 @@ def test_deferred_sources_new(self):
)

def test_rglobs_in_sources(self):
file_in_target_a = 'root/src/java/a/foo.java'
file_in_target_b = 'root/src/java/a/b/foo.java'

self.create_file(file_in_target_a, contents='', mode='w')
self.create_file(file_in_target_b, contents='', mode='w')

self.assert_console_output(
'root/src/java/a:a_java',
workspace=self.workspace(files=[file_in_target_a])
workspace=self.workspace(files=['root/src/java/a/foo.java'])
)

self.assert_console_output(
'root/src/java/a:a_java',
workspace=self.workspace(files=[file_in_target_b])
workspace=self.workspace(files=['root/src/java/a/b/foo.java'])
)

def test_globs_in_sources(self):
file_in_target_a = 'root/src/java/b/foo.java'
file_in_target_b = 'root/src/java/b/b/foo.java'

self.create_file(file_in_target_a, contents='', mode='w')
self.create_file(file_in_target_b, contents='', mode='w')

self.assert_console_output(
'root/src/java/b:b_java',
workspace=self.workspace(files=[file_in_target_a])
workspace=self.workspace(files=['root/src/java/b/foo.java'])
)

self.assert_console_output(
Expand Down Expand Up @@ -439,11 +443,9 @@ def test_globs_in_resources_2(self):
)

def test_root_config(self):
file_in_target = 'pants.ini'
self.create_file(relpath=file_in_target, contents='', mode='w')
self.assert_console_output(
'//:pants-config',
workspace=self.workspace(files=[file_in_target])
workspace=self.workspace(files=['pants.ini'])
)

def test_exclude_sources(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from pants.base.build_environment import get_buildroot
from pants.util.contextutil import environment_as, temporary_dir
from pants.util.dirutil import safe_mkdir, safe_open, touch
from pants.util.dirutil import safe_delete, safe_mkdir, safe_open, touch
from pants_test.base_test import TestGenerator
from pants_test.pants_run_integration_test import PantsRunIntegrationTest, ensure_engine
from pants_test.testutils.git_util import initialize_repo
Expand Down Expand Up @@ -409,6 +409,25 @@ def test_changed_with_multiple_build_files(self):
self.assert_success(pants_run)
self.assertEqual(pants_run.stdout_data.strip(), '')

@ensure_engine
def test_changed_with_deleted_file(self):
deleted_file = 'src/python/sources/sources.py'

with create_isolated_git_repo() as worktree:
safe_delete(os.path.join(worktree, deleted_file))
pants_run = self.run_pants(['changed'])
self.assert_success(pants_run)
self.assertEqual(pants_run.stdout_data.strip(), 'src/python/sources:sources')

def test_list_changed(self):
deleted_file = 'src/python/sources/sources.py'

with create_isolated_git_repo() as worktree:
safe_delete(os.path.join(worktree, deleted_file))
pants_run = self.run_pants(['--enable-v2-engine', '--changed-parent=HEAD', 'list'])
self.assert_success(pants_run)
self.assertEqual(pants_run.stdout_data.strip(), 'src/python/sources:sources')

# Following 4 tests do not run in isolated repo because they don't mutate working copy.
def test_changed(self):
self.assert_changed_new_equals_old([])
Expand Down
8 changes: 8 additions & 0 deletions tests/python/pants_test/source/BUILD
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_tests(
name = 'filespec',
sources = ['test_filespec.py'],
dependencies = [
'src/python/pants/source',
]
)

python_tests(
name = 'payload_fields',
sources = ['test_payload_fields.py'],
Expand Down
Loading

0 comments on commit 0510f02

Please sign in to comment.