Skip to content

Commit

Permalink
Filter out default configs when overrides exist. (#21539)
Browse files Browse the repository at this point in the history
* Filter out default configs when overrides exist.

When sending configs to Airflow workers we materialize a temp config file. In #18772 a feature was added so that `_cmd` generated secrets are not written to the files in some cases instead favoring maintaining the raw `_cmd` settings. Unfortunately during materializing of the configs via `as_dict()` Airflow defaults are generated and materialized as well including defaults for the non `_cmd` versions of some settings. And due to Airflow setting precedence stating bare versions of settings winning over `_cmd` versions it results in `_cmd` settings being discarded:
https://airflow.apache.org/docs/apache-airflow/stable/howto/set-config.html

This change checks `_cmd`, env, and secrets when materializing configs via `as_dict()` so that if the bare versions of the values is exactly the same as Airflow defaults and we have "hidden" / special versions of these configs that are trying to be set we remove the bare versions so that the correct version can be used.

Fixes: #20092
Related to: #18772 #4050

(cherry picked from commit e07bc63)
  • Loading branch information
xyu authored and ephraimbuddy committed Mar 22, 2022
1 parent c5b6e16 commit fed7cd5
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
58 changes: 58 additions & 0 deletions airflow/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,15 @@ def as_dict(
"""
Returns the current configuration as an OrderedDict of OrderedDicts.
When materializing current configuration Airflow defaults are
materialized along with user set configs. If any of the `include_*`
options are False then the result of calling command or secret key
configs do not override Airflow defaults and instead are passed through.
In order to then avoid Airflow defaults from overwriting user set
command or secret key configs we filter out bare sensitive_config_values
that are set to Airflow defaults when command or secret key configs
produce different values.
:param display_source: If False, the option value is returned. If True,
a tuple of (option_value, source) is returned. Source is either
'airflow.cfg', 'default', 'env var', or 'cmd'.
Expand Down Expand Up @@ -618,14 +627,21 @@ def as_dict(
# add env vars and overwrite because they have priority
if include_env:
self._include_envs(config_sources, display_sensitive, display_source, raw)
else:
self._filter_by_source(config_sources, display_source, self._get_env_var_option)

# add bash commands
if include_cmds:
self._include_commands(config_sources, display_sensitive, display_source, raw)
else:
self._filter_by_source(config_sources, display_source, self._get_cmd_option)

# add config from secret backends
if include_secret:
self._include_secrets(config_sources, display_sensitive, display_source, raw)
else:
self._filter_by_source(config_sources, display_source, self._get_secret_option)

return config_sources

def _include_secrets(self, config_sources, display_sensitive, display_source, raw):
Expand Down Expand Up @@ -683,6 +699,48 @@ def _include_envs(self, config_sources, display_sensitive, display_source, raw):
key = key.lower()
config_sources.setdefault(section, OrderedDict()).update({key: opt})

def _filter_by_source(self, config_sources, display_source, getter_func):
"""
Deletes default configs from current configuration (an OrderedDict of
OrderedDicts) if it would conflict with special sensitive_config_values.
This is necessary because bare configs take precedence over the command
or secret key equivalents so if the current running config is
materialized with Airflow defaults they in turn override user set
command or secret key configs.
:param config_sources: The current configuration to operate on
:param display_source: If False, configuration options contain raw
values. If True, options are a tuple of (option_value, source).
Source is either 'airflow.cfg', 'default', 'env var', or 'cmd'.
:param getter_func: A callback function that gets the user configured
override value for a particular sensitive_config_values config.
:rtype: None
:return: None, the given config_sources is filtered if necessary,
otherwise untouched.
"""
for (section, key) in self.sensitive_config_values:
# Don't bother if we don't have section / key
if section not in config_sources or key not in config_sources[section]:
continue
# Check that there is something to override defaults
try:
getter_opt = getter_func(section, key)
except ValueError:
continue
if not getter_opt:
continue
# Check to see that there is a default value
if not self.airflow_defaults.has_option(section, key):
continue
# Check to see if bare setting is the same as defaults
if display_source:
opt, source = config_sources[section][key]
else:
opt = config_sources[section][key]
if opt == self.airflow_defaults.get(section, key):
del config_sources[section][key]

@staticmethod
def _replace_config_with_display_sources(config_sources, configs, display_source, raw):
for (source_name, config) in configs:
Expand Down
4 changes: 4 additions & 0 deletions docs/apache-airflow/howto/set-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ The universal order of precedence for all configuration options is as follows:
#. secret key in ``airflow.cfg``
#. Airflow's built in defaults

.. note::
For Airflow versions >= 2.2.1, < 2.3.0 Airflow's built in defaults took precedence
over command and secret key in ``airflow.cfg`` in some circumstances.

You can check the current configuration with the ``airflow config list`` command.

If you only want to see the value for one option, you can use ``airflow config get-value`` command as in
Expand Down
46 changes: 46 additions & 0 deletions tests/core/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import copy
import io
import os
import re
Expand Down Expand Up @@ -720,3 +721,48 @@ def test_enum_logging_levels(self):
"CRITICAL, FATAL, ERROR, WARN, WARNING, INFO, DEBUG."
)
assert message == exception

def test_as_dict_works_without_sensitive_cmds(self):
conf_materialize_cmds = conf.as_dict(display_sensitive=True, raw=True, include_cmds=True)
conf_maintain_cmds = conf.as_dict(display_sensitive=True, raw=True, include_cmds=False)

assert 'sql_alchemy_conn' in conf_materialize_cmds['core']
assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core']

assert 'sql_alchemy_conn' in conf_maintain_cmds['core']
assert 'sql_alchemy_conn_cmd' not in conf_maintain_cmds['core']

assert (
conf_materialize_cmds['core']['sql_alchemy_conn']
== conf_maintain_cmds['core']['sql_alchemy_conn']
)

def test_as_dict_respects_sensitive_cmds(self):
conf_conn = conf['core']['sql_alchemy_conn']
test_conf = copy.deepcopy(conf)
test_conf.read_string(
textwrap.dedent(
"""
[core]
sql_alchemy_conn_cmd = echo -n my-super-secret-conn
"""
)
)

conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True)
conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False)

assert 'sql_alchemy_conn' in conf_materialize_cmds['core']
assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core']

if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']:
assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn'

assert 'sql_alchemy_conn_cmd' in conf_maintain_cmds['core']
assert conf_maintain_cmds['core']['sql_alchemy_conn_cmd'] == 'echo -n my-super-secret-conn'

if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']:
assert 'sql_alchemy_conn' not in conf_maintain_cmds['core']
else:
assert 'sql_alchemy_conn' in conf_maintain_cmds['core']
assert conf_maintain_cmds['core']['sql_alchemy_conn'] == conf_conn

0 comments on commit fed7cd5

Please sign in to comment.