Skip to content

Latest commit

 

History

History
341 lines (304 loc) · 15.9 KB

README.md

File metadata and controls

341 lines (304 loc) · 15.9 KB

dotifi

Tests PyPI version Downloads Downloads Downloads

A utility to generate DOT files and images based on graphviz from the canvas of an Apache NiFi instance.

dotifi supports

  • connecting to Apache Nifi using nipyapi in the background and generating a graph from the canvas items
    • Process Groups
    • Remote Process Groups
    • Input and Output Ports
    • Processors
    • Connections
  • creating DOT files using Graphviz and pygraphviz
  • creating a graphic file in Graphviz supported formats
  • starting with an existing dot file as opposed to building from a live NiFi instance
  • starting from a specific process group
  • controlling the depth of recursion
  • supplying DOT files as templates for configuring
    • the root graph ( and all children)
    • Any process group by ID
  • supplying name value pairs ( in yaml configuration file) to override and set attributes
    • Processors by id
    • Remote Process Groups by id
  • ssl, certificates, and auth-tokens

Install

>pip install dotifi

with python 3 as your default

Sample output

DOT

strict digraph "nifi flow" {
	graph [compound=true,
		label="nifi flow",
		outputorder=edgesfirst,
		rankdir=LR,
		ratio=1.0
	];
	node [fixedsize=false,
		fontsize=8,
		label="\N",
		shape=rectangle,
		style=filled
	];
	edge [color="#1100FF",
		style="setlinewidth(2)"
	];
	subgraph cluster_root_1 {
		graph [id="351b1dbc-0172-1000-056d-ec78a003b493",
			label=root_1
		];
		subgraph cluster_remote_target {
			graph [id="65f57b35-0172-1000-d805-dec5391bee7e",
				label=remote_target
			];
			"65f5b8db-0172-1000-407f-93e85d1d9f94"	[id="65f5b8db-0172-1000-407f-93e85d1d9f94",
				label="Input 1 : INPUT_PORT",
				pos="-768.000000,-768.000000"];
			"65f629b8-0172-1000-4ced-efa192c35536"	[id="65f629b8-0172-1000-4ced-efa192c35536",
				label="log_remote_attributes : LogAttribute",
				pos="-344.000000,-344.000000"];
			"65f5b8db-0172-1000-407f-93e85d1d9f94" -> "65f629b8-0172-1000-4ced-efa192c35536"	[label="Input to Processor"];
		}
		subgraph cluster_root_1_1 {
			graph [id="351dbb56-0172-1000-21fa-a4af9fc0dbb1",
				label=root_1_1
			];
			subgraph "cluster_NiFi Flow" {
				graph [color=blue,
					fontcolor=white,
					id="65f8c7d5-0172-1000-a916-0e5562295e08",
					label="http://localhost:8080/nifi : Remote Process Group",
					style=filled
				];
				"37b065f7-232b-30b9-a282-3ea28f75a2a2"	[id="37b065f7-232b-30b9-a282-3ea28f75a2a2",
					label="Input 1 : INPUT_PORT"];
			}
			"b268f335-2a91-44e8-b567-c676c292fd55"	[id="b268f335-2a91-44e8-b567-c676c292fd55",
				label="Reformat Date Column : ReplaceText",
				pos="1259.009165,1259.009165"];
			"25b139c3-db89-4272-59c3-774ce58f0973"	[id="25b139c3-db89-4272-59c3-774ce58f0973",
				label="No-Op Termination : UpdateAttribute",
				pos="1265.009165,1265.009165"];
			"b268f335-2a91-44e8-b567-c676c292fd55" -> "25b139c3-db89-4272-59c3-774ce58f0973"	[label=success];
			"b268f335-2a91-44e8-b567-c676c292fd55" -> "37b065f7-232b-30b9-a282-3ea28f75a2a2"	[label=success];
			"67626c71-910a-4342-627d-e38cfcbcb924"	[id="67626c71-910a-4342-627d-e38cfcbcb924",
				label="Set CSV Content : ReplaceText",
				pos="408.000000,408.000000"];
			"67626c71-910a-4342-627d-e38cfcbcb924" -> "b268f335-2a91-44e8-b567-c676c292fd55"	[label=success];
			"bc3ad300-c93c-430b-fc94-3a32c3020f53"	[id="bc3ad300-c93c-430b-fc94-3a32c3020f53",
				label="Generate Empty File : GenerateFlowFile",
				pos="356.009165,356.009165"];
			"bc3ad300-c93c-430b-fc94-3a32c3020f53" -> "67626c71-910a-4342-627d-e38cfcbcb924"	[label=success];
		}
		"719d5fb9-cd96-4491-fe7c-e81b3f3bb555"	[id="719d5fb9-cd96-4491-fe7c-e81b3f3bb555",
			label="q=nifi : UpdateAttribute",
			pos="1242.304169,1242.304169"];
		"e829b880-ef17-432a-7c5c-1e3ecdcd690d"	[id="e829b880-ef17-432a-7c5c-1e3ecdcd690d",
			label="Search Google : InvokeHTTP",
			pos="1245.295837,1245.295837"];
		"719d5fb9-cd96-4491-fe7c-e81b3f3bb555" -> "e829b880-ef17-432a-7c5c-1e3ecdcd690d"	[label=success];
		"f96be8b1-78b2-42f2-6ba5-2579f4f6c411"	[id="f96be8b1-78b2-42f2-6ba5-2579f4f6c411",
			label="Source : GenerateFlowFile",
			pos="425.000000,425.000000"];
		"f96be8b1-78b2-42f2-6ba5-2579f4f6c411" -> "719d5fb9-cd96-4491-fe7c-e81b3f3bb555"	[label=success];
		"83ca901a-efe0-4d97-dbca-5c557cefbe6f"	[id="83ca901a-efe0-4d97-dbca-5c557cefbe6f",
			label="HashContent : HashContent",
			pos="1264.422387,1264.422387"];
		"ffcc8819-75f5-4811-e906-434e71a31f26"	[id="ffcc8819-75f5-4811-e906-434e71a31f26",
			label="Route On Status Code : RouteOnAttribute",
			pos="1242.078949,1242.078949"];
		"ffcc8819-75f5-4811-e906-434e71a31f26" -> "83ca901a-efe0-4d97-dbca-5c557cefbe6f"	[label=200];
		"e829b880-ef17-432a-7c5c-1e3ecdcd690d" -> "ffcc8819-75f5-4811-e906-434e71a31f26"	[label=Original];
		"e829b880-ef17-432a-7c5c-1e3ecdcd690d" -> "e829b880-ef17-432a-7c5c-1e3ecdcd690d"	[label=Retry];
	}
}

PNG nifi-canvas.png

poetry

dotifi is maintained using poetry for dependency management and packaging.

Getting started for development

  • Install Graphviz
  • Clone, fork, or download the source
  • Install poetry
  • If required setup pyenv or your preference to get a python 3.8 environment, as poetry will use whatever the current python is.
    • for example setup pyenv local to the project directory
  • In the source route directory run poetry install, this will install all the dependencies
  • Run peotry run pytest -v to run the tests and ensure things are working
  • Run poetry build then poetry install then poetry run dotifi with any options to run

Setting up Jetbrains PyCharm with your virtual python environment

Configuration

dotifi uses confuse for it's configuration management. dotifi will take most of its configuration:

  • from commandline parameters
  • from a yaml configuration file specified with the --with-conf-file parameter
  • some combination of the two

If a configuration file is present, and some configuration value is set both from the commandline and in the configuration, then the commandline is treated as an override for the configuration file.

String values in the configuration file may be entered as shell environment variables, such as $PATH. Variables are expanded using os.path.expandvars.

usage: dotifi [-h] [--with-existing-dot-file WITH_EXISTING_DOT_FILE]
              [--with-conf-file WITH_CONF_FILE]
              [--output-dot-file OUTPUT_DOT_FILE]
              [--output-graphviz-format {canon,cmap,cmapx,cmapx_np,dia,dot,fig,gd,gd2,gif,hpgl,imap,imap_np,ismap,jpe,jpeg,jpg,mif,mp,pcl,pdf,pic,plain,plain-ext,png,ps,ps2,svg,svgz,vml,vmlz,vrml,vtx,wbmp,xdot,xlib}]
              [--output-graphviz-program {neato,dot,twopi,circo,fdp,nop}]
              [--output-graphviz-file OUTPUT_GRAPHVIZ_FILE]
              [--start-at-pg START_AT_PG] [--depth DEPTH]
              [--nifi-url NIFI_URL] [--using-ssl] [--using-user-pw]
              [--ca-file CA_FILE] [--client-cert-file CLIENT_CERT_FILE]
              [--client-key-file CLIENT_KEY_FILE]
              [--client-key-password CLIENT_KEY_PASSWORD]
              [--auth-token AUTH_TOKEN]
              [--auth-token-api-field AUTH_TOKEN_API_FIELD]
              [--nifi-username NIFI_USERNAME]
              [--nifi-user-password NIFI_USER_PASSWORD] [--verbose]
              [--generate-mock-data] [--mock-data-file MOCK_DATA_FILE]

optional arguments:
  -h, --help            show this help message and exit
  --with-existing-dot-file WITH_EXISTING_DOT_FILE, -w WITH_EXISTING_DOT_FILE
                        Output will be based on an existing DOT graph
                        definition as opposed to being built from NiFi
  --with-conf-file WITH_CONF_FILE, -c WITH_CONF_FILE
                        Path to the .yaml file with the configuration. All
                        options can be set in the configuration, with
  --output-dot-file OUTPUT_DOT_FILE, -o OUTPUT_DOT_FILE
                        Path to the gv file to store the dot results to.
  --output-graphviz-format {canon,cmap,cmapx,cmapx_np,dia,dot,fig,gd,gd2,gif,hpgl,imap,imap_np,ismap,jpe,jpeg,jpg,mif,mp,pcl,pdf,pic,plain,plain-ext,png,ps,ps2,svg,svgz,vml,vmlz,vrml,vtx,wbmp,xdot,xlib}, -f {canon,cmap,cmapx,cmapx_np,dia,dot,fig,gd,gd2,gif,hpgl,imap,imap_np,ismap,jpe,jpeg,jpg,mif,mp,pcl,pdf,pic,plain,plain-ext,png,ps,ps2,svg,svgz,vml,vmlz,vrml,vtx,wbmp,xdot,xlib}
                        The format of the graphviz generated file. Formats
                        (not all may be available on every system depending on
                        how Graphviz was built)
  --output-graphviz-program {neato,dot,twopi,circo,fdp,nop}
                        Graphviz layout method to use.
  --output-graphviz-file OUTPUT_GRAPHVIZ_FILE, -g OUTPUT_GRAPHVIZ_FILE
                        Path to the graphical file to store the graphviz
                        results to. Results will be saved with the extension
                        of the --output-graphviz-format option
  --start-at-pg START_AT_PG, -s START_AT_PG
                        The id of the process group to start at. This will be
                        a uuid. When set the output will start with this
                        process and it's descendents based on the depth
                        setting
  --depth DEPTH, -d DEPTH
                        The depth to descend to within nested process groups.
                        Note that the top level canvas is the root process
                        group. As such a depth of 0 will only output items in
                        the root canvas and not any process groups it
                        contains. A value of -1 means unlimited.
  --nifi-url NIFI_URL, -n NIFI_URL
                        The url of the NiFi instance to connect to. This is
                        used if --with-existing is not set.
  --using-ssl           Flag, when specified it signals that the NiFi
                        connection requires SSL
  --using-user-pw       Flag, when specified it signals that the NiFi
                        connection requires a username and password
  --ca-file CA_FILE     A PEM file containing certs for the root CA(s) for the
                        NiFi server
  --client-cert-file CLIENT_CERT_FILE
                        A PEM file containing the public certificates for the
                        user / client identity
  --client-key-file CLIENT_KEY_FILE
                        An encrypted (password -protected PEM file containing
                        the client's secret key
  --client-key-password CLIENT_KEY_PASSWORD
                        The password to decrypt the client_key_file
  --auth-token AUTH_TOKEN
                        Authorization token string
  --auth-token-api-field AUTH_TOKEN_API_FIELD
                        the api_key field name to set the token to. Defaults
                        to 'tokenAuth'
  --nifi-username NIFI_USERNAME
                        The NiFi user name
  --nifi-user-password NIFI_USER_PASSWORD
                        The NiFi user password
  --verbose, -v         Sets the logging level to verbose
  --generate-mock-data  Generates mock data
  --mock-data-file MOCK_DATA_FILE
                        When --generate-mock-data is specified, it will be
                        written to this file

A sample of the yaml configuration is here

# All string values in this configuration may be replaced using
# environmental variables in the form of $VARIABLENAME
#
# the version of this configuration
configuration_version : 1


#  Flag to set logging to verbose ( debugging level )
verbose:
#  Path to an existing DOT graph definition as opposed to being built from NiFi
with_existing_dot_file:
# Path to the dot file to store the dot results to
output_dot_file:

#  The format of the graphviz generated file. Formats (not all may be available on every system
#  depending on how Graphviz was built
#  "canon", "cmap", "cmapx", "cmapx_np", "dia", "dot", "fig", "gd", "gd2", "gif",
#  "hpgl", "imap", "imap_np", "ismap", "jpe", "jpeg", "jpg", "mif", "mp", "pcl", "pdf",
#  "pic", "plain", "plain_ext", "png", "ps", "ps2", "svg", "svgz", "vml", "vmlz", "vrml",
#  "vtx", "wbmp", "xdot", "xlib"
output_graphviz_format:

#  The Graphviz layout to use
# ‘neato’|’dot’|’twopi’|’circo’|’fdp’|’nop'
output_graphviz_program:

#  Path to the dot file to store the graphviz results to. Results will be saved with the extension
#  output_graphviz_format option
output_graphviz_file:

#  The id of the process group to start at.  This will be a uuid.  When set the output will start
#  with this process and it's decedents based on the depth setting
start_at_pg:

#  The depth to descend to within nested process groups.  Note that the top level canvas
#  is the root process group.  As such a depth of 0 will only output items in the root canvas and
#  not any process groups it contains.  A value of _1 means unlimited
depth:

# The url of the nifi instance to connect to.  This is used if with_existing is not set
nifi_url:

# Flag, when specified it signals that the NiFi connection requires SSL
using_ssl:

# Flag, when specified it signals that the NiFi connection requires a username and password
using_user_pw:

# A PEM file containing certs for the root CA(s) for the NiFi serve
ca_file:

# A PEM file containing the public certificates for the user / client identity
client_cert_file:

# An encrypted (password _protected PEM file containing the client's secret key
client_key_file:

# The password to decrypt the client_key_file
client_key_password:

# The NiFi user name
nifi_user_name:

# The NiFi user password
nifi_user_password:

# Options for the entire graph
graph:
    # Path to a dot file that contains top level graph definition that sets the properties
    # and attributes at a graph level
    # see https://graphviz.org/documentation/ for information on the dot language
    template: bar.dot
# Options for specific process groups, by id
process_groups:
    # the uuid id of the process group
    351b1dbc-0172-1000-056d-ec78a003b493:
        # Path to a dot file the contains the graph definition that sets the properties
        # and attributes at a graph level for this process group and it's descendents
        template: foo.dot
# Options for specific remote process groups, by id
# these values will override or mix in with the defaults
remote_process_groups:
    # the uuid of the remote process group
    35199793-0172-1000-02ea-52da1888a03d:
        # NODE attributes
        # see https://graphviz.gitlab.io/_pages/doc/info/attrs.html
        color: grey
# Options for specific processors, by id
processors:
    # the uuid of the processor
    351b1dbc-0172-1000-056d-ec78a003b49:
        # NODE attributes
        # see https://graphviz.gitlab.io/_pages/doc/info/attrs.html
        color: blue

command line parameters that are described as flags need only be specified without a value to be set to True. YAML configuration values however need to be explicitly set to True or False