Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster docs extraction #197

Merged
merged 11 commits into from
Nov 3, 2020
1 change: 1 addition & 0 deletions antsibull.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ process_max = none
pypi_url = https://pypi.org/
thread_max = 80
max_retries = 10
doc_parsing_backend = ansible-doc
logging_cfg = {
version = 1.0
outputs = {
Expand Down
4 changes: 3 additions & 1 deletion antsibull/app_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def run(args):
_FIELDS_IN_APP_CTX = frozenset(('galaxy_url', 'logging_cfg', 'pypi_url'))

#: Field names in the args and config which whose value will be added to the lib_ctx
_FIELDS_IN_LIB_CTX = frozenset(('chunksize', 'process_max', 'thread_max', 'max_retries'))
_FIELDS_IN_LIB_CTX = frozenset(
('chunksize', 'process_max', 'thread_max', 'max_retries', 'doc_parsing_backend'))

#: lib_ctx should be restricted to things which do not belong in the API but an application or
#: user might want to tweak. Global, internal, incidental values are good to store here. Things
Expand Down Expand Up @@ -242,6 +243,7 @@ class LibContext(BaseModel):
process_max: t.Optional[int] = None
thread_max: int = 64
max_retries: int = 10
doc_parsing_backend: str = 'ansible-internal'

@p.validator('process_max', pre=True)
def convert_to_none(cls, value):
Expand Down
2 changes: 1 addition & 1 deletion antsibull/cli/doc_commands/stable.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ...collections import install_together
from ...compat import asyncio_run, best_get_loop
from ...dependency_files import DepsFile
from ...docs_parsing.ansible_doc import get_ansible_plugin_info
from ...docs_parsing.parsing import get_ansible_plugin_info
from ...docs_parsing.fqcn import get_fqcn_parts
from ...galaxy import CollectionDownloader
from ...logging import log
Expand Down
5 changes: 5 additions & 0 deletions antsibull/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#: Valid choices for a logging level field
LEVEL_CHOICES_F = p.Field(..., regex='^(CRITICAL|ERROR|WARNING|NOTICE|INFO|DEBUG|DISABLED)$')

#: Valid choices for a logging level field
DOC_PARSING_BACKEND_CHOICES_F = p.Field(
'ansible-internal', regex='^(ansible-doc|ansible-internal)$')

#: Valid choice of the logging version field
VERSION_CHOICES_F = p.Field(..., regex=r'1\.0')

Expand Down Expand Up @@ -131,6 +135,7 @@ class ConfigModel(BaseModel):
pypi_url: p.HttpUrl = 'https://pypi.org/'
thread_max: int = 80
max_retries: int = 10
doc_parsing_backend: str = DOC_PARSING_BACKEND_CHOICES_F

@p.validator('process_max', pre=True)
def convert_to_none(cls, value):
Expand Down
173 changes: 173 additions & 0 deletions antsibull/data/collection-enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Copyright: (c) 2014, James Tanner <tanner.jc@gmail.com>
# Copyright: (c) 2018, Ansible Project
# Copyright: (c) 2020, Felix Fontein
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)

# Parts taken from Ansible's ansible-doc sources

import argparse
import json
import sys

import ansible.plugins.loader as plugin_loader

from ansible import constants as C
from ansible import release as ansible_release
from ansible.cli import doc
from ansible.cli.arguments import option_helpers as opt_help
from ansible.collections.list import list_collection_dirs
from ansible.galaxy.collection import CollectionRequirement
from ansible.module_utils._text import to_native
from ansible.module_utils.common.json import AnsibleJSONEncoder
from ansible.plugins.loader import action_loader, fragment_loader
from ansible.utils.collection_loader import AnsibleCollectionConfig
from ansible.utils.plugin_docs import get_docstring


def load_plugin(loader, plugin_type, plugin):
result = {}
try:
plugin_context = loader.find_plugin_with_context(
plugin, mod_type='.py', ignore_deprecated=True, check_aliases=True)
if not plugin_context.resolved:
result['error'] = 'Cannot find plugin'
return result
plugin_name = plugin_context.plugin_resolved_name
filename = plugin_context.plugin_resolved_path
collection_name = plugin_context.plugin_resolved_collection

result.update({
'plugin_name': plugin_name,
'filename': filename,
'collection_name': collection_name,
})

documentation, plainexamples, returndocs, metadata = get_docstring(
filename, fragment_loader, verbose=False,
collection_name=collection_name, is_module=(plugin_type == 'module'))

if documentation is None:
result['error'] = 'No valid documentation found'
return result

documentation['filename'] = filename
documentation['collection'] = collection_name

if plugin_type == 'module':
# is there corresponding action plugin?
if plugin in action_loader:
documentation['has_action'] = True
else:
documentation['has_action'] = False

ansible_doc = {
'doc': documentation,
'examples': plainexamples,
'return': returndocs,
'metadata': metadata,
}

try:
# If this fails, the documentation cannot be seralized as JSON
json.dumps(ansible_doc, cls=AnsibleJSONEncoder)
# Store result. This is guaranteed to be serializable
result['ansible-doc'] = ansible_doc
except Exception as e:
result['error'] = (
'Cannot serialize documentation as JSON: %s' % to_native(e)
)
except Exception as e:
result['error'] = (
'Missing documentation or could not parse documentation: %s' % to_native(e)
)

return result


def ansible_doc_coll_filter(coll_filter):
return coll_filter[0] if coll_filter and len(coll_filter) == 1 else None


def match_filter(name, coll_filter):
if coll_filter is None or name in coll_filter:
return True
for filter in coll_filter:
if name.startswith(filter + '.'):
return True
return False


def load_all_plugins(plugin_type, basedir, coll_filter):
loader = getattr(plugin_loader, '%s_loader' % plugin_type)

if basedir:
loader.add_directory(basedir, with_subdir=True)

loader._paths = None # reset so we can use subdirs below

plugin_list = set()

if match_filter('ansible.builtin', coll_filter):
paths = loader._get_paths_with_context()
for path_context in paths:
plugin_list.update(
doc.DocCLI.find_plugins(path_context.path, path_context.internal, plugin_type))

doc.add_collection_plugins(
plugin_list, plugin_type, coll_filter=ansible_doc_coll_filter(coll_filter))

result = {}
for plugin in plugin_list:
if match_filter(plugin, coll_filter):
result[plugin] = load_plugin(loader, plugin_type, plugin)

return result


def main(args):
parser = argparse.ArgumentParser(
prog=args[0], description='Bulk extraction of Ansible plugin docs.')
parser.add_argument('args', nargs='*', help='Collection filter', metavar='collection_filter')
parser.add_argument('--pretty', action='store_true', help='Pretty-print JSON')
opt_help.add_basedir_options(parser)

arguments = parser.parse_args(args[1:])

basedir = arguments.basedir
coll_filter = arguments.args or None

if basedir:
AnsibleCollectionConfig.playbook_paths = basedir

result = {
'plugins': {},
'collections': {},
}

# Export plugin docs
for plugin_type in C.DOCUMENTABLE_PLUGINS:
result['plugins'][plugin_type] = load_all_plugins(plugin_type, basedir, coll_filter)

# Export collection data
b_colldirs = list_collection_dirs(coll_filter=ansible_doc_coll_filter(coll_filter))
for b_path in b_colldirs:
collection = CollectionRequirement.from_path(b_path, False, fallback_metadata=True)

collection_name = '{0}.{1}'.format(collection.namespace, collection.name)
if match_filter(collection_name, coll_filter):
version = collection.metadata.version
result['collections'][collection_name] = {
'path': to_native(b_path),
'version': version if version != '*' else None,
}
if match_filter('ansible.builtin', coll_filter):
result['collections']['ansible.builtin'] = {
'version': ansible_release.__version__,
}

print(json.dumps(
result, cls=AnsibleJSONEncoder, sort_keys=True, indent=4 if arguments.pretty else None))


if __name__ == '__main__':
main(sys.argv)
64 changes: 64 additions & 0 deletions antsibull/docs_parsing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Author: Toshio Kuratomi <tkuratom@redhat.com>
# License: GPLv3+
# Copyright: Ansible Project, 2020
"""Parse documentation from ansible plugins using anible-doc."""

import os
import typing as t


#: Clear Ansible environment variables that set paths where plugins could be found.
ANSIBLE_PATH_ENVIRON: t.Dict[str, str] = os.environ.copy()
ANSIBLE_PATH_ENVIRON.update({'ANSIBLE_COLLECTIONS_PATH': '/dev/null',
'ANSIBLE_ACTION_PLUGINS': '/dev/null',
'ANSIBLE_CACHE_PLUGINS': '/dev/null',
'ANSIBLE_CALLBACK_PLUGINS': '/dev/null',
'ANSIBLE_CLICONF_PLUGINS': '/dev/null',
'ANSIBLE_CONNECTION_PLUGINS': '/dev/null',
'ANSIBLE_FILTER_PLUGINS': '/dev/null',
'ANSIBLE_HTTPAPI_PLUGINS': '/dev/null',
'ANSIBLE_INVENTORY_PLUGINS': '/dev/null',
'ANSIBLE_LOOKUP_PLUGINS': '/dev/null',
'ANSIBLE_LIBRARY': '/dev/null',
'ANSIBLE_MODULE_UTILS': '/dev/null',
'ANSIBLE_NETCONF_PLUGINS': '/dev/null',
'ANSIBLE_ROLES_PATH': '/dev/null',
'ANSIBLE_STRATEGY_PLUGINS': '/dev/null',
'ANSIBLE_TERMINAL_PLUGINS': '/dev/null',
'ANSIBLE_TEST_PLUGINS': '/dev/null',
'ANSIBLE_VARS_PLUGINS': '/dev/null',
'ANSIBLE_DOC_FRAGMENT_PLUGINS': '/dev/null',
})
try:
del ANSIBLE_PATH_ENVIRON['PYTHONPATH']
except KeyError:
# We just wanted to make sure there was no PYTHONPATH set...
# all python libs will come from the venv
pass
try:
del ANSIBLE_PATH_ENVIRON['ANSIBLE_COLLECTIONS_PATHS']
except KeyError:
# ANSIBLE_COLLECTIONS_PATHS is the deprecated name replaced by
# ANSIBLE_COLLECTIONS_PATH
pass


class ParsingError(Exception):
"""Error raised while parsing plugins for documentation."""


def _get_environment(collection_dir: t.Optional[str]) -> t.Dict[str, str]:
env = ANSIBLE_PATH_ENVIRON.copy()
if collection_dir is not None:
env['ANSIBLE_COLLECTIONS_PATH'] = collection_dir
else:
# Copy ANSIBLE_COLLECTIONS_PATH and ANSIBLE_COLLECTIONS_PATHS from the
# original environment.
for env_var in ('ANSIBLE_COLLECTIONS_PATH', 'ANSIBLE_COLLECTIONS_PATHS'):
try:
del env[env_var]
except KeyError:
pass
if env_var in os.environ:
env[env_var] = os.environ[env_var]
return env
62 changes: 5 additions & 57 deletions antsibull/docs_parsing/ansible_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import asyncio
import json
import os
import sys
import traceback
import typing as t
Expand All @@ -19,52 +18,14 @@
from ..logging import log
from ..vendored.json_utils import _filter_non_json_lines
from .fqcn import get_fqcn_parts
from . import _get_environment, ParsingError

if t.TYPE_CHECKING:
from ..venv import VenvRunner, FakeVenvRunner


mlog = log.fields(mod=__name__)

#: Clear Ansible environment variables that set paths where plugins could be found.
ANSIBLE_PATH_ENVIRON: t.Dict[str, str] = os.environ.copy()
ANSIBLE_PATH_ENVIRON.update({'ANSIBLE_COLLECTIONS_PATH': '/dev/null',
'ANSIBLE_ACTION_PLUGINS': '/dev/null',
'ANSIBLE_CACHE_PLUGINS': '/dev/null',
'ANSIBLE_CALLBACK_PLUGINS': '/dev/null',
'ANSIBLE_CLICONF_PLUGINS': '/dev/null',
'ANSIBLE_CONNECTION_PLUGINS': '/dev/null',
'ANSIBLE_FILTER_PLUGINS': '/dev/null',
'ANSIBLE_HTTPAPI_PLUGINS': '/dev/null',
'ANSIBLE_INVENTORY_PLUGINS': '/dev/null',
'ANSIBLE_LOOKUP_PLUGINS': '/dev/null',
'ANSIBLE_LIBRARY': '/dev/null',
'ANSIBLE_MODULE_UTILS': '/dev/null',
'ANSIBLE_NETCONF_PLUGINS': '/dev/null',
'ANSIBLE_ROLES_PATH': '/dev/null',
'ANSIBLE_STRATEGY_PLUGINS': '/dev/null',
'ANSIBLE_TERMINAL_PLUGINS': '/dev/null',
'ANSIBLE_TEST_PLUGINS': '/dev/null',
'ANSIBLE_VARS_PLUGINS': '/dev/null',
'ANSIBLE_DOC_FRAGMENT_PLUGINS': '/dev/null',
})
try:
del ANSIBLE_PATH_ENVIRON['PYTHONPATH']
except KeyError:
# We just wanted to make sure there was no PYTHONPATH set...
# all python libs will come from the venv
pass
try:
del ANSIBLE_PATH_ENVIRON['ANSIBLE_COLLECTIONS_PATHS']
except KeyError:
# ANSIBLE_COLLECTIONS_PATHS is the deprecated name replaced by
# ANSIBLE_COLLECTIONS_PATH
pass


class ParsingError(Exception):
"""Error raised while parsing plugins for documentation."""


def _process_plugin_results(plugin_type: str,
plugin_names: t.Iterable[str],
Expand Down Expand Up @@ -197,23 +158,6 @@ async def _get_plugin_info(plugin_type: str, ansible_doc: 'sh.Command',
return results


def _get_environment(collection_dir: t.Optional[str]) -> t.Dict[str, str]:
env = ANSIBLE_PATH_ENVIRON.copy()
if collection_dir is not None:
env['ANSIBLE_COLLECTIONS_PATH'] = collection_dir
else:
# Copy ANSIBLE_COLLECTIONS_PATH and ANSIBLE_COLLECTIONS_PATHS from the
# original environment.
for env_var in ('ANSIBLE_COLLECTIONS_PATH', 'ANSIBLE_COLLECTIONS_PATHS'):
try:
del env[env_var]
except KeyError:
pass
if env_var in os.environ:
env[env_var] = os.environ[env_var]
return env


async def get_ansible_plugin_info(venv: t.Union['VenvRunner', 'FakeVenvRunner'],
collection_dir: t.Optional[str],
collection_names: t.Optional[t.List[str]] = None
Expand All @@ -234,6 +178,9 @@ async def get_ansible_plugin_info(venv: t.Union['VenvRunner', 'FakeVenvRunner'],
{information from ansible-doc --json. See the ansible-doc documentation for more
info.}
"""
flog = mlog.fields(func='get_ansible_plugin_info')
flog.debug('Enter')

env = _get_environment(collection_dir)

# Setup an sh.Command to run ansible-doc from the venv with only the collections we
Expand Down Expand Up @@ -299,4 +246,5 @@ async def get_ansible_plugin_info(venv: t.Union['VenvRunner', 'FakeVenvRunner'],
# done so, we want to then fail by raising one of the exceptions.
raise ParsingError('Parsing of plugins failed')

flog.debug('Leave')
return plugin_map
Loading