pkg/sql/distsqlrun/column_exec_setup.go

// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package distsqlrun

import (
	"context"
	"fmt"
	"math"
	"reflect"
	"strings"
	"sync"
	"sync/atomic"

	"github.com/cockroachdb/cockroach/pkg/col/coltypes"
	"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
	"github.com/cockroachdb/cockroach/pkg/sql/distsqlpb"
	"github.com/cockroachdb/cockroach/pkg/sql/exec"
	"github.com/cockroachdb/cockroach/pkg/sql/exec/colrpc"
	"github.com/cockroachdb/cockroach/pkg/sql/exec/typeconv"
	"github.com/cockroachdb/cockroach/pkg/sql/exec/vecbuiltins"
	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
	"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
	"github.com/cockroachdb/cockroach/pkg/sql/types"
	"github.com/cockroachdb/cockroach/pkg/util"
	"github.com/cockroachdb/cockroach/pkg/util/log"
	"github.com/cockroachdb/cockroach/pkg/util/mon"
	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
	"github.com/cockroachdb/cockroach/pkg/util/tracing"
	"github.com/cockroachdb/errors"
	"github.com/opentracing/opentracing-go"
)

func checkNumIn(inputs []exec.Operator, numIn int) error {
	if len(inputs) != numIn {
		return errors.Errorf("expected %d input(s), got %d", numIn, len(inputs))
	}
	return nil
}

// wrapRowSource, given an input exec.Operator, integrates toWrap into a
// columnar execution flow and returns toWrap's output as an exec.Operator.
func wrapRowSource(
	ctx context.Context,
	flowCtx *FlowCtx,
	input exec.Operator,
	inputTypes []types.T,
	newToWrap func(RowSource) (RowSource, error),
) (*columnarizer, error) {
	var (
		toWrapInput RowSource
		// TODO(asubiotto): Plumb proper processorIDs once we have stats.
		processorID int32
	)
	// Optimization: if the input is a columnarizer, its input is necessarily a
	// RowSource, so remove the unnecessary conversion.
	if c, ok := input.(*columnarizer); ok {
		// TODO(asubiotto): We might need to do some extra work to remove references
		// to this operator (e.g. streamIDToOp).
		toWrapInput = c.input
	} else {
		var err error
		toWrapInput, err = newMaterializer(
			flowCtx,
			processorID,
			input,
			inputTypes,
			&distsqlpb.PostProcessSpec{},
			nil, /* output */
			nil, /* metadataSourcesQueue */
			nil, /* outputStatsToTrace */
			nil, /* cancelFlow */
		)
		if err != nil {
			return nil, err
		}
	}

	toWrap, err := newToWrap(toWrapInput)
	if err != nil {
		return nil, err
	}

	return newColumnarizer(ctx, flowCtx, processorID, toWrap)
}

type newColOperatorResult struct {
	op              exec.Operator
	columnTypes     []types.T
	memUsage        int
	metadataSources []distsqlpb.MetadataSource
	isStreaming     bool
}

// newColOperator creates a new columnar operator according to the given spec.
func newColOperator(
	ctx context.Context, flowCtx *FlowCtx, spec *distsqlpb.ProcessorSpec, inputs []exec.Operator,
) (result newColOperatorResult, err error) {
	log.VEventf(ctx, 2, "planning col operator for spec %q", spec)

	core := &spec.Core
	post := &spec.Post

	// By default, we safely assume that an operator is not streaming. Note that
	// projections, renders, filters, limits, offsets as well as all internal
	// operators (like stats collectors and cancel checkers) are streaming, so in
	// order to determine whether the resulting chain of operators is streaming,
	// it is sufficient to look only at the "core" operator.
	result.isStreaming = false
	switch {
	case core.Noop != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}
		result.op, result.isStreaming = exec.NewNoop(inputs[0]), true
		result.columnTypes = spec.Input[0].ColumnTypes
	case core.TableReader != nil:
		if err := checkNumIn(inputs, 0); err != nil {
			return result, err
		}
		if core.TableReader.IsCheck {
			return result, errors.Newf("scrub table reader is unsupported in vectorized")
		}
		var scanOp *colBatchScan
		scanOp, err = newColBatchScan(flowCtx, core.TableReader, post)
		if err != nil {
			return result, err
		}
		result.op, result.isStreaming = scanOp, true
		result.metadataSources = append(result.metadataSources, scanOp)
		// colBatchScan is wrapped with a cancel checker below, so we need to
		// account for its static memory usage here. We also need to log its
		// creation separately.
		result.memUsage += scanOp.EstimateStaticMemoryUsage()
		log.VEventf(ctx, 1, "made op %T\n", result.op)

		// We want to check for cancellation once per input batch, and wrapping
		// only colBatchScan with an exec.CancelChecker allows us to do just that.
		// It's sufficient for most of the operators since they are extremely fast.
		// However, some of the long-running operators (for example, sorter) are
		// still responsible for doing the cancellation check on their own while
		// performing long operations.
		result.op = exec.NewCancelChecker(result.op)
		returnMutations := core.TableReader.Visibility == distsqlpb.ScanVisibility_PUBLIC_AND_NOT_PUBLIC
		result.columnTypes = core.TableReader.Table.ColumnTypesWithMutations(returnMutations)
	case core.Aggregator != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}
		aggSpec := core.Aggregator
		if len(aggSpec.Aggregations) == 0 {
			// We can get an aggregator when no aggregate functions are present if
			// HAVING clause is present, for example, with a query as follows:
			// SELECT 1 FROM t HAVING true. In this case, we plan a special operator
			// that outputs a batch of length 1 without actual columns once and then
			// zero-length batches. The actual "data" will be added by projections
			// below.
			// TODO(solon): The distsql plan for this case includes a TableReader, so
			// we end up creating an orphaned colBatchScan. We should avoid that.
			// Ideally the optimizer would not plan a scan in this unusual case.
			result.op, result.isStreaming, err = exec.NewSingleTupleNoInputOp(), true, nil
			// We make columnTypes non-nil so that sanity check doesn't panic.
			result.columnTypes = make([]types.T, 0)
			break
		}
		if len(aggSpec.GroupCols) == 0 &&
			len(aggSpec.Aggregations) == 1 &&
			aggSpec.Aggregations[0].FilterColIdx == nil &&
			aggSpec.Aggregations[0].Func == distsqlpb.AggregatorSpec_COUNT_ROWS &&
			!aggSpec.Aggregations[0].Distinct {
			result.op, result.isStreaming, err = exec.NewCountOp(inputs[0]), true, nil
			result.columnTypes = []types.T{*types.Int}
			break
		}

		var groupCols, orderedCols util.FastIntSet

		for _, col := range aggSpec.OrderedGroupCols {
			orderedCols.Add(int(col))
		}

		needHash := false
		for _, col := range aggSpec.GroupCols {
			if !orderedCols.Contains(int(col)) {
				needHash = true
			}
			groupCols.Add(int(col))
		}
		if !orderedCols.SubsetOf(groupCols) {
			return result, errors.AssertionFailedf("ordered cols must be a subset of grouping cols")
		}

		aggTyps := make([][]types.T, len(aggSpec.Aggregations))
		aggCols := make([][]uint32, len(aggSpec.Aggregations))
		aggFns := make([]distsqlpb.AggregatorSpec_Func, len(aggSpec.Aggregations))
		result.columnTypes = make([]types.T, len(aggSpec.Aggregations))
		for i, agg := range aggSpec.Aggregations {
			if agg.Distinct {
				return result, errors.Newf("distinct aggregation not supported")
			}
			if agg.FilterColIdx != nil {
				return result, errors.Newf("filtering aggregation not supported")
			}
			if len(agg.Arguments) > 0 {
				return result, errors.Newf("aggregates with arguments not supported")
			}
			aggTyps[i] = make([]types.T, len(agg.ColIdx))
			for j, colIdx := range agg.ColIdx {
				aggTyps[i][j] = spec.Input[0].ColumnTypes[colIdx]
			}
			aggCols[i] = agg.ColIdx
			aggFns[i] = agg.Func
			switch agg.Func {
			case distsqlpb.AggregatorSpec_SUM:
				switch aggTyps[i][0].Family() {
				case types.IntFamily:
					// TODO(alfonso): plan ordinary SUM on integer types by casting to DECIMAL
					// at the end, mod issues with overflow. Perhaps to avoid the overflow
					// issues, at first, we could plan SUM for all types besides Int64.
					return result, errors.Newf("sum on int cols not supported (use sum_int)")
				}
			case distsqlpb.AggregatorSpec_SUM_INT:
				// TODO(yuzefovich): support this case through vectorize.
				if aggTyps[i][0].Width() != 64 {
					return result, errors.Newf("sum_int is only supported on Int64 through vectorized")
				}
			}
			_, retType, err := GetAggregateInfo(agg.Func, aggTyps[i]...)
			if err != nil {
				return result, err
			}
			result.columnTypes[i] = *retType
		}
		var typs []coltypes.T
		typs, err = typeconv.FromColumnTypes(spec.Input[0].ColumnTypes)
		if err != nil {
			return result, err
		}
		if needHash {
			result.op, err = exec.NewHashAggregator(
				inputs[0], typs, aggFns, aggSpec.GroupCols, aggCols, isScalarAggregate(aggSpec),
			)
		} else {
			result.op, err = exec.NewOrderedAggregator(
				inputs[0], typs, aggFns, aggSpec.GroupCols, aggCols, isScalarAggregate(aggSpec),
			)
			result.isStreaming = true
		}

	case core.Distinct != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}

		var distinctCols, orderedCols util.FastIntSet

		for _, col := range core.Distinct.OrderedColumns {
			orderedCols.Add(int(col))
		}
		for _, col := range core.Distinct.DistinctColumns {
			if !orderedCols.Contains(int(col)) {
				return result, errors.Newf("unsorted distinct not supported")
			}
			distinctCols.Add(int(col))
		}
		if !orderedCols.SubsetOf(distinctCols) {
			return result, errors.AssertionFailedf("ordered cols must be a subset of distinct cols")
		}

		result.columnTypes = spec.Input[0].ColumnTypes
		var typs []coltypes.T
		typs, err = typeconv.FromColumnTypes(result.columnTypes)
		if err != nil {
			return result, err
		}
		result.op, err = exec.NewOrderedDistinct(inputs[0], core.Distinct.OrderedColumns, typs)
		result.isStreaming = true

	case core.Ordinality != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}
		result.columnTypes = append(spec.Input[0].ColumnTypes, *types.Int)
		result.op, result.isStreaming = exec.NewOrdinalityOp(inputs[0]), true

	case core.HashJoiner != nil:
		if err := checkNumIn(inputs, 2); err != nil {
			return result, err
		}

		var leftTypes, rightTypes []coltypes.T
		leftTypes, err = typeconv.FromColumnTypes(spec.Input[0].ColumnTypes)
		if err != nil {
			return result, err
		}
		rightTypes, err = typeconv.FromColumnTypes(spec.Input[1].ColumnTypes)
		if err != nil {
			return result, err
		}

		nLeftCols := uint32(len(leftTypes))
		nRightCols := uint32(len(rightTypes))

		leftOutCols := make([]uint32, 0)
		rightOutCols := make([]uint32, 0)

		// Note that we do not need a special treatment in case of LEFT SEMI and
		// LEFT ANTI joins when setting up outCols because in such cases there will
		// be a projection with post.OutputColumns already projecting out the right
		// side.
		if post.Projection {
			for _, col := range post.OutputColumns {
				if col < nLeftCols {
					leftOutCols = append(leftOutCols, col)
				} else {
					rightOutCols = append(rightOutCols, col-nLeftCols)
				}
			}
		} else {
			for i := uint32(0); i < nLeftCols; i++ {
				leftOutCols = append(leftOutCols, i)
			}
			for i := uint32(0); i < nRightCols; i++ {
				rightOutCols = append(rightOutCols, i)
			}
		}

		var (
			onExpr         *distsqlpb.Expression
			filterPlanning *filterPlanningState
		)
		if !core.HashJoiner.OnExpr.Empty() {
			if core.HashJoiner.Type != sqlbase.JoinType_INNER {
				return result, errors.Newf("can't plan non-inner hash join with on expressions")
			}
			onExpr = &core.HashJoiner.OnExpr
			filterPlanning = newFilterPlanningState(len(leftTypes), len(rightTypes))
			leftOutCols, rightOutCols = filterPlanning.renderAllNeededCols(
				*onExpr, leftOutCols, rightOutCols,
			)
		}

		result.op, err = exec.NewEqHashJoinerOp(
			inputs[0],
			inputs[1],
			core.HashJoiner.LeftEqColumns,
			core.HashJoiner.RightEqColumns,
			leftOutCols,
			rightOutCols,
			leftTypes,
			rightTypes,
			core.HashJoiner.RightEqColumnsAreKey,
			core.HashJoiner.LeftEqColumnsAreKey || core.HashJoiner.RightEqColumnsAreKey,
			core.HashJoiner.Type,
		)
		if err != nil {
			return result, err
		}

		result.columnTypes = make([]types.T, nLeftCols+nRightCols)
		copy(result.columnTypes, spec.Input[0].ColumnTypes)
		if core.HashJoiner.Type != sqlbase.JoinType_LEFT_SEMI {
			// TODO(yuzefovich): update this conditional once LEFT ANTI is supported.
			copy(result.columnTypes[nLeftCols:], spec.Input[1].ColumnTypes)
		} else {
			result.columnTypes = result.columnTypes[:nLeftCols]
		}

		if onExpr != nil {
			filterPlanning.remapIVars(onExpr)
			err = result.planFilterExpr(flowCtx.NewEvalCtx(), *onExpr)
			filterPlanning.projectOutExtraCols(&result, leftOutCols, rightOutCols)
		}

	case core.MergeJoiner != nil:
		if err := checkNumIn(inputs, 2); err != nil {
			return result, err
		}

		if core.MergeJoiner.Type.IsSetOpJoin() {
			return result, errors.AssertionFailedf("unexpectedly %s merge join was planned", core.MergeJoiner.Type.String())
		}
		// Merge joiner is a streaming operator when equality columns form a key
		// for both of the inputs.
		result.isStreaming = core.MergeJoiner.LeftEqColumnsAreKey && core.MergeJoiner.RightEqColumnsAreKey

		var leftTypes, rightTypes []coltypes.T
		leftTypes, err = typeconv.FromColumnTypes(spec.Input[0].ColumnTypes)
		if err != nil {
			return result, err
		}
		rightTypes, err = typeconv.FromColumnTypes(spec.Input[1].ColumnTypes)
		if err != nil {
			return result, err
		}

		nLeftCols := uint32(len(leftTypes))
		nRightCols := uint32(len(rightTypes))

		leftOutCols := make([]uint32, 0, nLeftCols)
		rightOutCols := make([]uint32, 0, nRightCols)

		// Note that we do not need a special treatment in case of LEFT SEMI and
		// LEFT ANTI joins when setting up outCols because in such cases there will
		// be a projection with post.OutputColumns already projecting out the right
		// side.
		if post.Projection {
			for _, col := range post.OutputColumns {
				if col < nLeftCols {
					leftOutCols = append(leftOutCols, col)
				} else {
					rightOutCols = append(rightOutCols, col-nLeftCols)
				}
			}
		} else {
			for i := uint32(0); i < nLeftCols; i++ {
				leftOutCols = append(leftOutCols, i)
			}
			for i := uint32(0); i < nRightCols; i++ {
				rightOutCols = append(rightOutCols, i)
			}
		}

		var (
			onExpr            *distsqlpb.Expression
			filterPlanning    *filterPlanningState
			filterOnlyOnLeft  bool
			filterConstructor func(exec.Operator) (exec.Operator, error)
		)
		if !core.MergeJoiner.OnExpr.Empty() {
			// At the moment, we want to be on the conservative side and not run
			// queries with ON expressions when vectorize=auto, so we say that the
			// merge join is not streaming which will reject running such a query
			// through vectorized engine with 'auto' setting.
			// TODO(yuzefovich): remove this when we're confident in ON expression
			// support.
			result.isStreaming = false

			onExpr = &core.MergeJoiner.OnExpr
			filterPlanning = newFilterPlanningState(len(leftTypes), len(rightTypes))
			switch core.MergeJoiner.Type {
			case sqlbase.JoinType_INNER:
				leftOutCols, rightOutCols = filterPlanning.renderAllNeededCols(
					*onExpr, leftOutCols, rightOutCols,
				)
			case sqlbase.JoinType_LEFT_SEMI, sqlbase.JoinType_LEFT_ANTI:
				filterOnlyOnLeft = filterPlanning.isFilterOnlyOnLeft(*onExpr)
				filterConstructor = func(op exec.Operator) (exec.Operator, error) {
					r := newColOperatorResult{
						op:          op,
						columnTypes: append(spec.Input[0].ColumnTypes, spec.Input[1].ColumnTypes...),
					}
					// We don't need to remap the indexed vars in onExpr because the
					// filter will be run alongside the merge joiner, and it will have
					// access to all of the columns from both sides.
					err := r.planFilterExpr(flowCtx.NewEvalCtx(), *onExpr)
					return r.op, err
				}
			default:
				return result, errors.Errorf("can only plan INNER, LEFT SEMI, and LEFT ANTI merge joins with ON expressions")
			}
		}

		result.op, err = exec.NewMergeJoinOp(
			core.MergeJoiner.Type,
			inputs[0],
			inputs[1],
			leftOutCols,
			rightOutCols,
			leftTypes,
			rightTypes,
			core.MergeJoiner.LeftOrdering.Columns,
			core.MergeJoiner.RightOrdering.Columns,
			filterConstructor,
			filterOnlyOnLeft,
		)
		if err != nil {
			return result, err
		}

		result.columnTypes = make([]types.T, nLeftCols+nRightCols)
		copy(result.columnTypes, spec.Input[0].ColumnTypes)
		if core.MergeJoiner.Type != sqlbase.JoinType_LEFT_SEMI &&
			core.MergeJoiner.Type != sqlbase.JoinType_LEFT_ANTI {
			copy(result.columnTypes[nLeftCols:], spec.Input[1].ColumnTypes)
		} else {
			result.columnTypes = result.columnTypes[:nLeftCols]
		}

		if onExpr != nil && core.MergeJoiner.Type == sqlbase.JoinType_INNER {
			filterPlanning.remapIVars(onExpr)
			err = result.planFilterExpr(flowCtx.NewEvalCtx(), *onExpr)
			filterPlanning.projectOutExtraCols(&result, leftOutCols, rightOutCols)
		}

	case core.JoinReader != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}

		var c *columnarizer
		c, err = wrapRowSource(ctx, flowCtx, inputs[0], spec.Input[0].ColumnTypes, func(input RowSource) (RowSource, error) {
			var (
				jr  RowSource
				err error
			)
			// The lookup and index joiners need to be passed the post-process specs,
			// since they inspect them to figure out information about needed columns.
			// This means that we'll let those processors do any renders or filters,
			// which isn't ideal. We could improve this.
			if len(core.JoinReader.LookupColumns) == 0 {
				jr, err = newIndexJoiner(
					flowCtx, spec.ProcessorID, core.JoinReader, input, post, nil, /* output */
				)
			} else {
				jr, err = newJoinReader(
					flowCtx, spec.ProcessorID, core.JoinReader, input, post, nil, /* output */
				)
			}
			post = &distsqlpb.PostProcessSpec{}
			if err != nil {
				return nil, err
			}
			result.columnTypes = jr.OutputTypes()
			return jr, nil
		})
		result.op, result.isStreaming = c, true
		result.metadataSources = append(result.metadataSources, c)

	case core.Sorter != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}
		input := inputs[0]
		var inputTypes []coltypes.T
		inputTypes, err = typeconv.FromColumnTypes(spec.Input[0].ColumnTypes)
		if err != nil {
			return result, err
		}
		orderingCols := core.Sorter.OutputOrdering.Columns
		matchLen := core.Sorter.OrderingMatchLen
		if matchLen > 0 {
			// The input is already partially ordered. Use a chunks sorter to avoid
			// loading all the rows into memory.
			result.op, err = exec.NewSortChunks(input, inputTypes, orderingCols, int(matchLen))
		} else if post.Limit != 0 && post.Filter.Empty() && post.Limit+post.Offset < math.MaxUint16 {
			// There is a limit specified with no post-process filter, so we know
			// exactly how many rows the sorter should output. Choose a top K sorter,
			// which uses a heap to avoid storing more rows than necessary.
			k := uint16(post.Limit + post.Offset)
			result.op, result.isStreaming = exec.NewTopKSorter(input, inputTypes, orderingCols, k), true
		} else {
			// No optimizations possible. Default to the standard sort operator.
			result.op, err = exec.NewSorter(input, inputTypes, orderingCols)
		}
		result.columnTypes = spec.Input[0].ColumnTypes

	case core.Windower != nil:
		if err := checkNumIn(inputs, 1); err != nil {
			return result, err
		}
		if len(core.Windower.WindowFns) != 1 {
			return result, errors.Newf("only a single window function is currently supported")
		}
		wf := core.Windower.WindowFns[0]
		if wf.Frame != nil &&
			(wf.Frame.Mode != distsqlpb.WindowerSpec_Frame_RANGE ||
				wf.Frame.Bounds.Start.BoundType != distsqlpb.WindowerSpec_Frame_UNBOUNDED_PRECEDING ||
				(wf.Frame.Bounds.End != nil && wf.Frame.Bounds.End.BoundType != distsqlpb.WindowerSpec_Frame_CURRENT_ROW)) {
			return result, errors.Newf("window functions with non-default window frames are not supported")
		}
		if wf.Func.AggregateFunc != nil {
			return result, errors.Newf("aggregate functions used as window functions are not supported")
		}

		input := inputs[0]
		var typs []coltypes.T
		typs, err = typeconv.FromColumnTypes(spec.Input[0].ColumnTypes)
		if err != nil {
			return result, err
		}
		tempPartitionColOffset, partitionColIdx := 0, -1
		if len(core.Windower.PartitionBy) > 0 {
			// TODO(yuzefovich): add support for hashing partitioner (probably by
			// leveraging hash routers once we can distribute). The decision about
			// which kind of partitioner to use should come from the optimizer.
			input, err = exec.NewWindowSortingPartitioner(input, typs, core.Windower.PartitionBy, wf.Ordering.Columns, int(wf.OutputColIdx))
			tempPartitionColOffset, partitionColIdx = 1, int(wf.OutputColIdx)
		} else {
			if len(wf.Ordering.Columns) > 0 {
				input, err = exec.NewSorter(input, typs, wf.Ordering.Columns)
			}
			// TODO(yuzefovich): when both PARTITION BY and ORDER BY clauses are
			// omitted, the window function operator is actually streaming.
		}
		if err != nil {
			return result, err
		}

		orderingCols := make([]uint32, len(wf.Ordering.Columns))
		for i, col := range wf.Ordering.Columns {
			orderingCols[i] = col.ColIdx
		}
		switch *wf.Func.WindowFunc {
		case distsqlpb.WindowerSpec_ROW_NUMBER:
			result.op = vecbuiltins.NewRowNumberOperator(input, int(wf.OutputColIdx)+tempPartitionColOffset, partitionColIdx)
		case distsqlpb.WindowerSpec_RANK:
			result.op, err = vecbuiltins.NewRankOperator(input, typs, false /* dense */, orderingCols, int(wf.OutputColIdx)+tempPartitionColOffset, partitionColIdx)
		case distsqlpb.WindowerSpec_DENSE_RANK:
			result.op, err = vecbuiltins.NewRankOperator(input, typs, true /* dense */, orderingCols, int(wf.OutputColIdx)+tempPartitionColOffset, partitionColIdx)
		default:
			return result, errors.Newf("window function %s is not supported", wf.String())
		}

		if partitionColIdx != -1 {
			// Window partitioner will append a temporary column to the batch which
			// we want to project out.
			projection := make([]uint32, 0, wf.OutputColIdx+1)
			for i := uint32(0); i < wf.OutputColIdx; i++ {
				projection = append(projection, i)
			}
			projection = append(projection, wf.OutputColIdx+1)
			result.op = exec.NewSimpleProjectOp(result.op, int(wf.OutputColIdx+1), projection)
		}

		result.columnTypes = append(spec.Input[0].ColumnTypes, *types.Int)

	default:
		return result, errors.Newf("unsupported processor core %q", core)
	}

	if err != nil {
		return result, err
	}

	// After constructing the base operator, calculate the memory usage
	// of the operator.
	if sMemOp, ok := result.op.(exec.StaticMemoryOperator); ok {
		result.memUsage += sMemOp.EstimateStaticMemoryUsage()
	}

	log.VEventf(ctx, 1, "made op %T\n", result.op)

	if result.columnTypes == nil {
		return result, errors.AssertionFailedf("output columnTypes unset after planning %T", result.op)
	}

	if !post.Filter.Empty() {
		if err = result.planFilterExpr(flowCtx.NewEvalCtx(), post.Filter); err != nil {
			return result, err
		}
	}
	if post.Projection {
		result.op = exec.NewSimpleProjectOp(result.op, len(result.columnTypes), post.OutputColumns)
		// Update output columnTypes.
		newTypes := make([]types.T, 0, len(post.OutputColumns))
		for _, j := range post.OutputColumns {
			newTypes = append(newTypes, result.columnTypes[j])
		}
		result.columnTypes = newTypes
	} else if post.RenderExprs != nil {
		log.VEventf(ctx, 2, "planning render expressions %+v", post.RenderExprs)
		var renderedCols []uint32
		for _, expr := range post.RenderExprs {
			var (
				helper    exprHelper
				renderMem int
			)
			err := helper.init(expr, result.columnTypes, flowCtx.EvalCtx)
			if err != nil {
				return result, err
			}
			var outputIdx int
			result.op, outputIdx, result.columnTypes, renderMem, err = planProjectionOperators(
				flowCtx.NewEvalCtx(), helper.expr, result.columnTypes, result.op)
			if err != nil {
				return result, errors.Wrapf(err, "unable to columnarize render expression %q", expr)
			}
			if outputIdx < 0 {
				return result, errors.AssertionFailedf("missing outputIdx")
			}
			result.memUsage += renderMem
			renderedCols = append(renderedCols, uint32(outputIdx))
		}
		result.op = exec.NewSimpleProjectOp(result.op, len(result.columnTypes), renderedCols)
		newTypes := make([]types.T, 0, len(renderedCols))
		for _, j := range renderedCols {
			newTypes = append(newTypes, result.columnTypes[j])
		}
		result.columnTypes = newTypes
	}
	if post.Offset != 0 {
		result.op = exec.NewOffsetOp(result.op, post.Offset)
	}
	if post.Limit != 0 {
		result.op = exec.NewLimitOp(result.op, post.Limit)
	}
	return result, err
}

type filterPlanningState struct {
	numLeftInputCols  int
	numRightInputCols int
	// indexVarMap will be populated when rendering all needed columns in case
	// when at least one column from either side is used by the filter.
	indexVarMap       []int
	extraLeftOutCols  int
	extraRightOutCols int
}

func newFilterPlanningState(numLeftInputCols, numRightInputCols int) *filterPlanningState {
	return &filterPlanningState{
		numLeftInputCols:  numLeftInputCols,
		numRightInputCols: numRightInputCols,
	}
}

// renderAllNeededCols makes sure that all columns used by filter expression
// will be output. It does so by extracting the indices of all indexed vars
// used in the expression and appending those that are missing from *OutCols
// slices to the slices. Additionally, it populates p.indexVarMap to be used
// later to correctly remap the indexed vars and stores information about how
// many extra columns are added so that those extra columns could be projected
// out after the filter has been run.
// It returns updated leftOutCols and rightOutCols.
func (p *filterPlanningState) renderAllNeededCols(
	filter distsqlpb.Expression, leftOutCols []uint32, rightOutCols []uint32,
) ([]uint32, []uint32) {
	neededColumnsForFilter := findIVarsInRange(
		filter,
		0, /* start */
		p.numLeftInputCols+p.numRightInputCols,
	)
	if len(neededColumnsForFilter) > 0 {
		// At least one column is referenced by the filter expression.
		p.indexVarMap = make([]int, p.numLeftInputCols+p.numRightInputCols)
		for i := range p.indexVarMap {
			p.indexVarMap[i] = -1
		}
		// First, we process only the left side.
		for i, lCol := range leftOutCols {
			p.indexVarMap[lCol] = i
		}
		for _, neededCol := range neededColumnsForFilter {
			if int(neededCol) < p.numLeftInputCols {
				if p.indexVarMap[neededCol] == -1 {
					p.indexVarMap[neededCol] = len(leftOutCols)
					leftOutCols = append(leftOutCols, neededCol)
					p.extraLeftOutCols++
				}
			}
		}
		// Now that we know how many columns from the left will be output, we can
		// process the right side.
		//
		// Here is the explanation of all the indices' dance below:
		//   suppose we have two inputs with three columns in each, the filter
		//   expression as @1 = @4 AND @3 = @5, and leftOutCols = {0} and
		//   rightOutCols = {0} when this method was called. Note that only
		//   ordinals in the expression are counting from 1, everything else is
		//   zero-based.
		// - After we processed the left side above, we have the following state:
		//   neededColumnsForFilter = {0, 2, 3, 4}
		//   leftOutCols = {0, 2}
		//   p.indexVarMap = {0, -1, 1, -1, -1, -1}
		// - We calculate rColOffset = 3 to know which columns for filter are from
		//   the right side as well as to remap those for rightOutCols (the
		//   remapping step is needed because rightOutCols "thinks" only in the
		//   context of the right side).
		// - Next, we add already present rightOutCols to the indexed var map:
		//   rightOutCols = {0}
		//   p.indexVarMap = {0, -1, 1, 2, -1, -1}
		//   Note that we needed to remap the column index, and we could do so only
		//   after the left side has been processed because we need to know how
		//   many columns will be output from the left.
		// - Then, we go through the needed columns for filter slice again, and add
		//   any that are still missing to rightOutCols:
		//   rightOutCols = {0, 1}
		//   p.indexVarMap = {0, -1, 1, 2, 3, -1}
		// - We also stored the fact that we appended 1 extra column for both
		//   inputs, and we will project those out.
		rColOffset := uint32(p.numLeftInputCols)
		for i, rCol := range rightOutCols {
			p.indexVarMap[rCol+rColOffset] = len(leftOutCols) + i
		}
		for _, neededCol := range neededColumnsForFilter {
			if neededCol >= rColOffset {
				if p.indexVarMap[neededCol] == -1 {
					p.indexVarMap[neededCol] = len(rightOutCols) + len(leftOutCols)
					rightOutCols = append(rightOutCols, neededCol-rColOffset)
					p.extraRightOutCols++
				}
			}
		}
	}
	return leftOutCols, rightOutCols
}

// isFilterOnlyOnLeft returns whether the filter expression doesn't use columns
// from the right side.
func (p *filterPlanningState) isFilterOnlyOnLeft(filter distsqlpb.Expression) bool {
	// Find all needed columns for filter only from the right side.
	neededColumnsForFilter := findIVarsInRange(
		filter, p.numLeftInputCols, p.numLeftInputCols+p.numRightInputCols,
	)
	return len(neededColumnsForFilter) == 0
}

// remapIVars remaps tree.IndexedVars in expr using p.indexVarMap. Note that
// expr is modified in-place.
func (p *filterPlanningState) remapIVars(expr *distsqlpb.Expression) {
	if p.indexVarMap == nil {
		// If p.indexVarMap is nil, then there is no remapping to do.
		return
	}
	if expr.LocalExpr != nil {
		expr.LocalExpr = sqlbase.RemapIVarsInTypedExpr(expr.LocalExpr, p.indexVarMap)
	} else {
		// We iterate in the reverse order so that the multiple digit numbers are
		// handled correctly (consider an expression like @1 AND @11).
		for idx := len(p.indexVarMap) - 1; idx >= 0; idx-- {
			if p.indexVarMap[idx] != -1 {
				// We need +1 below because the ordinals are counting from 1.
				expr.Expr = strings.ReplaceAll(
					expr.Expr,
					fmt.Sprintf("@%d", idx+1),
					fmt.Sprintf("@%d", p.indexVarMap[idx]+1),
				)
			}
		}
	}
}

// projectOutExtraCols, possibly, adds a projection to remove all the extra
// columns that were needed by the filter expression.
func (p *filterPlanningState) projectOutExtraCols(
	result *newColOperatorResult, leftOutCols, rightOutCols []uint32,
) {
	if p.extraLeftOutCols+p.extraRightOutCols > 0 {
		projection := make([]uint32, 0, len(leftOutCols)+len(rightOutCols)-p.extraLeftOutCols-p.extraRightOutCols)
		for i := 0; i < len(leftOutCols)-p.extraLeftOutCols; i++ {
			projection = append(projection, uint32(i))
		}
		for i := 0; i < len(rightOutCols)-p.extraRightOutCols; i++ {
			projection = append(projection, uint32(i+len(leftOutCols)))
		}
		result.op = exec.NewSimpleProjectOp(result.op, len(leftOutCols)+len(rightOutCols), projection)
	}
}

func (r *newColOperatorResult) planFilterExpr(
	evalCtx *tree.EvalContext, filter distsqlpb.Expression,
) error {
	var (
		helper       exprHelper
		selectionMem int
	)
	err := helper.init(filter, r.columnTypes, evalCtx)
	if err != nil {
		return err
	}
	if helper.expr == tree.DNull {
		// The filter expression is tree.DNull meaning that it is always false, so
		// we put a zero operator.
		r.op = exec.NewZeroOp(r.op)
		return nil
	}
	var filterColumnTypes []types.T
	r.op, _, filterColumnTypes, selectionMem, err = planSelectionOperators(evalCtx, helper.expr, r.columnTypes, r.op)
	if err != nil {
		return errors.Wrapf(err, "unable to columnarize filter expression %q", filter.Expr)
	}
	r.memUsage += selectionMem
	if len(filterColumnTypes) > len(r.columnTypes) {
		// Additional columns were appended to store projections while evaluating
		// the filter. Project them away.
		var outputColumns []uint32
		for i := range r.columnTypes {
			outputColumns = append(outputColumns, uint32(i))
		}
		r.op = exec.NewSimpleProjectOp(r.op, len(filterColumnTypes), outputColumns)
	}
	return nil
}

func planSelectionOperators(
	ctx *tree.EvalContext, expr tree.TypedExpr, columnTypes []types.T, input exec.Operator,
) (op exec.Operator, resultIdx int, ct []types.T, memUsed int, err error) {
	switch t := expr.(type) {
	case *tree.IndexedVar:
		return exec.NewBoolVecToSelOp(input, t.Idx), -1, columnTypes, memUsed, nil
	case *tree.AndExpr:
		var leftOp, rightOp exec.Operator
		var memUsedLeft, memUsedRight int
		leftOp, _, ct, memUsedLeft, err = planSelectionOperators(ctx, t.TypedLeft(), columnTypes, input)
		if err != nil {
			return nil, resultIdx, ct, memUsed, err
		}
		rightOp, resultIdx, ct, memUsedRight, err = planSelectionOperators(
			ctx, t.TypedRight(), ct, leftOp)
		return rightOp, resultIdx, ct, memUsedLeft + memUsedRight, err
	case *tree.OrExpr:
		// OR expressions are handled by converting them to an equivalent CASE
		// statement. Since CASE statements don't have a selection form, plan a
		// projection and then convert the resulting boolean to a selection vector.
		op, resultIdx, ct, memUsed, err = planProjectionOperators(ctx, expr, columnTypes, input)
		op = exec.NewBoolVecToSelOp(op, resultIdx)
		return op, resultIdx, ct, memUsed, err
	case *tree.CaseExpr:
		op, resultIdx, ct, memUsed, err = planProjectionOperators(ctx, expr, columnTypes, input)
		op = exec.NewBoolVecToSelOp(op, resultIdx)
		return op, resultIdx, ct, memUsed, err
	case *tree.ComparisonExpr:
		cmpOp := t.Operator
		leftOp, leftIdx, ct, memUsageLeft, err := planProjectionOperators(ctx, t.TypedLeft(), columnTypes, input)
		if err != nil {
			return nil, resultIdx, ct, memUsageLeft, err
		}
		lTyp := &ct[leftIdx]
		if constArg, ok := t.Right.(tree.Datum); ok {
			if t.Operator == tree.Like || t.Operator == tree.NotLike {
				negate := t.Operator == tree.NotLike
				op, err := exec.GetLikeOperator(
					ctx, leftOp, leftIdx, string(tree.MustBeDString(constArg)), negate)
				return op, resultIdx, ct, memUsageLeft, err
			}
			if t.Operator == tree.In || t.Operator == tree.NotIn {
				negate := t.Operator == tree.NotIn
				datumTuple, ok := tree.AsDTuple(constArg)
				if !ok {
					err = errors.Errorf("IN is only supported for constant expressions")
					return nil, resultIdx, ct, memUsed, err
				}
				op, err := exec.GetInOperator(lTyp, leftOp, leftIdx, datumTuple, negate)
				return op, resultIdx, ct, memUsageLeft, err
			}
			op, err := exec.GetSelectionConstOperator(lTyp, t.TypedRight().ResolvedType(), cmpOp, leftOp, leftIdx, constArg)
			return op, resultIdx, ct, memUsageLeft, err
		}
		rightOp, rightIdx, ct, memUsageRight, err := planProjectionOperators(ctx, t.TypedRight(), ct, leftOp)
		if err != nil {
			return nil, resultIdx, ct, memUsageLeft + memUsageRight, err
		}
		op, err := exec.GetSelectionOperator(lTyp, &ct[rightIdx], cmpOp, rightOp, leftIdx, rightIdx)
		return op, resultIdx, ct, memUsageLeft + memUsageRight, err
	default:
		return nil, resultIdx, nil, memUsed, errors.Errorf("unhandled selection expression type: %s", reflect.TypeOf(t))
	}
}

// planTypedMaybeNullProjectionOperators is used to plan projection operators, but is able to
// plan constNullOperators in the case that we know the "type" of the null. It is currently
// unsafe to plan a constNullOperator when we don't know the type of the null.
func planTypedMaybeNullProjectionOperators(
	ctx *tree.EvalContext,
	expr tree.TypedExpr,
	exprTyp *types.T,
	columnTypes []types.T,
	input exec.Operator,
) (op exec.Operator, resultIdx int, ct []types.T, memUsed int, err error) {
	if expr == tree.DNull {
		resultIdx = len(columnTypes)
		op = exec.NewConstNullOp(input, resultIdx, typeconv.FromColumnType(exprTyp))
		ct = append(columnTypes, *exprTyp)
		memUsed = op.(exec.StaticMemoryOperator).EstimateStaticMemoryUsage()
		return op, resultIdx, ct, memUsed, nil
	}
	return planProjectionOperators(ctx, expr, columnTypes, input)
}

// planProjectionOperators plans a chain of operators to execute the provided
// expression. It returns the tail of the chain, as well as the column index
// of the expression's result (if any, otherwise -1) and the column types of the
// resulting batches.
func planProjectionOperators(
	ctx *tree.EvalContext, expr tree.TypedExpr, columnTypes []types.T, input exec.Operator,
) (op exec.Operator, resultIdx int, ct []types.T, memUsed int, err error) {
	resultIdx = -1
	switch t := expr.(type) {
	case *tree.IndexedVar:
		return input, t.Idx, columnTypes, memUsed, nil
	case *tree.ComparisonExpr:
		return planProjectionExpr(ctx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(), columnTypes, input)
	case *tree.BinaryExpr:
		return planProjectionExpr(ctx, t.Operator, t.ResolvedType(), t.TypedLeft(), t.TypedRight(), columnTypes, input)
	case *tree.CastExpr:
		expr := t.Expr.(tree.TypedExpr)
		// If the expression is NULL, we use planTypedMaybeNullProjectionOperators instead of planProjectionOperators
		// because we can say that the type of the NULL is the type that we are casting to, rather than unknown.
		// We can't use planProjectionOperators because it will reject planning a constNullOp without knowing
		// the post typechecking "type" of the NULL.
		if expr.ResolvedType() == types.Unknown {
			op, resultIdx, ct, memUsed, err = planTypedMaybeNullProjectionOperators(ctx, expr, t.Type, columnTypes, input)
		} else {
			op, resultIdx, ct, memUsed, err = planProjectionOperators(ctx, expr, columnTypes, input)
		}
		if err != nil {
			return nil, 0, nil, 0, err
		}
		outputIdx := len(ct)
		op, err = exec.GetCastOperator(op, resultIdx, outputIdx, expr.ResolvedType(), t.Type)
		ct = append(ct, *t.Type)
		if sMem, ok := op.(exec.StaticMemoryOperator); ok {
			memUsed += sMem.EstimateStaticMemoryUsage()
		}
		return op, outputIdx, ct, memUsed, err
	case *tree.FuncExpr:
		var (
			inputCols     []int
			projectionMem int
		)
		ct = columnTypes
		op = input
		for _, e := range t.Exprs {
			var err error
			// TODO(rohany): This could be done better, especially in the case of
			// constant arguments, because the vectorized engine right now
			// creates a new column full of the constant value.
			op, resultIdx, ct, projectionMem, err = planProjectionOperators(ctx, e.(tree.TypedExpr), ct, op)
			if err != nil {
				return nil, resultIdx, nil, memUsed, err
			}
			inputCols = append(inputCols, resultIdx)
			memUsed += projectionMem
		}
		funcOutputType := t.ResolvedType()
		resultIdx = len(ct)
		ct = append(ct, *funcOutputType)
		op = exec.NewBuiltinFunctionOperator(ctx, t, ct, inputCols, resultIdx, op)
		return op, resultIdx, ct, memUsed, nil
	case tree.Datum:
		datumType := t.ResolvedType()
		ct = columnTypes
		resultIdx = len(ct)
		ct = append(ct, *datumType)
		if datumType.Family() == types.UnknownFamily {
			return nil, resultIdx, ct, memUsed, errors.New("cannot plan null type unknown")
		}
		typ := typeconv.FromColumnType(datumType)
		constVal, err := typeconv.GetDatumToPhysicalFn(datumType)(t)
		if err != nil {
			return nil, resultIdx, ct, memUsed, err
		}
		op, err := exec.NewConstOp(input, typ, constVal, resultIdx)
		if err != nil {
			return nil, resultIdx, ct, memUsed, err
		}
		return op, resultIdx, ct, memUsed, nil
	case *tree.CaseExpr:
		if t.Expr != nil {
			return nil, resultIdx, ct, 0, errors.New("CASE <expr> WHEN expressions unsupported")
		}

		buffer := exec.NewBufferOp(input)
		caseOps := make([]exec.Operator, len(t.Whens))
		caseOutputType := typeconv.FromColumnType(t.ResolvedType())
		caseOutputIdx := len(columnTypes)
		ct = append(columnTypes, *t.ResolvedType())
		thenIdxs := make([]int, len(t.Whens)+1)
		for i, when := range t.Whens {
			// The case operator is assembled from n WHEN arms, n THEN arms, and an
			// ELSE arm. Each WHEN arm is a boolean projection. Each THEN arm (and the
			// ELSE arm) is a projection of the type of the CASE expression. We set up
			// each WHEN arm to write its output to a fresh column, and likewise for
			// the THEN arms and the ELSE arm. Each WHEN arm individually acts on the
			// single input batch from the CaseExpr's input and is then transformed
			// into a selection vector, after which the THEN arm runs to create the
			// output just for the tuples that matched the WHEN arm. Each subsequent
			// WHEN arm will use the inverse of the selection vector to avoid running
			// the WHEN projection on tuples that have already been matched by a
			// previous WHEN arm. Finally, after each WHEN arm runs, we copy the
			// results of the WHEN into a single output vector, assembling the final
			// result of the case projection.
			var whenMemUsed, thenMemUsed int
			caseOps[i], resultIdx, ct, whenMemUsed, err = planTypedMaybeNullProjectionOperators(
				ctx, when.Cond.(tree.TypedExpr), t.ResolvedType(), ct, buffer)
			if err != nil {
				return nil, resultIdx, ct, 0, err
			}
			// Transform the booleans to a selection vector.
			caseOps[i] = exec.NewBoolVecToSelOp(caseOps[i], resultIdx)

			// Run the "then" clause on those tuples that were selected.
			caseOps[i], thenIdxs[i], ct, thenMemUsed, err = planTypedMaybeNullProjectionOperators(ctx, when.Val.(tree.TypedExpr), t.ResolvedType(), ct,
				caseOps[i])
			if err != nil {
				return nil, resultIdx, ct, 0, err
			}

			memUsed += whenMemUsed + thenMemUsed
		}
		var elseMem int
		var elseOp exec.Operator
		elseExpr := t.Else
		if elseExpr == nil {
			// If there's no ELSE arm, we write NULLs.
			elseExpr = tree.DNull
		}
		elseOp, thenIdxs[len(t.Whens)], ct, elseMem, err = planTypedMaybeNullProjectionOperators(
			ctx, elseExpr.(tree.TypedExpr), t.ResolvedType(), ct, buffer)
		if err != nil {
			return nil, resultIdx, ct, 0, err
		}
		memUsed += elseMem

		op := exec.NewCaseOp(buffer, caseOps, elseOp, thenIdxs, caseOutputIdx, caseOutputType)

		return op, caseOutputIdx, ct, memUsed, nil
	case *tree.AndExpr:
		var leftOp, rightOp exec.Operator
		var leftIdx, rightIdx, lMemUsed, rMemUsed int
		leftOp, leftIdx, ct, lMemUsed, err = planTypedMaybeNullProjectionOperators(ctx, t.TypedLeft(), types.Bool, columnTypes, input)
		if err != nil {
			return nil, resultIdx, ct, 0, err
		}
		rightOp, rightIdx, ct, rMemUsed, err = planTypedMaybeNullProjectionOperators(ctx, t.TypedRight(), types.Bool, ct, leftOp)
		if err != nil {
			return nil, resultIdx, ct, 0, err
		}
		// Add a new boolean column that ands the two output columns.
		resultIdx = len(ct)
		ct = append(ct, *t.ResolvedType())
		andOp := exec.NewAndOp(rightOp, leftIdx, rightIdx, resultIdx)
		return andOp, resultIdx, ct, lMemUsed + rMemUsed, nil
	case *tree.OrExpr:
		// Rewrite the OR expression as an equivalent CASE expression.
		// "a OR b" becomes "CASE WHEN a THEN true WHEN b THEN true ELSE false END".
		// This way we can take advantage of the short-circuiting logic built into
		// the CASE operator. (b should not be evaluated if a is true.)
		caseExpr, err := tree.NewTypedCaseExpr(
			nil,
			[]*tree.When{
				{Cond: t.Left, Val: tree.DBoolTrue},
				{Cond: t.Right, Val: tree.DBoolTrue},
			},
			tree.DBoolFalse,
			types.Bool)
		if err != nil {
			return nil, resultIdx, ct, memUsed, err
		}
		return planProjectionOperators(ctx, caseExpr, columnTypes, input)
	default:
		return nil, resultIdx, nil, memUsed, errors.Errorf("unhandled projection expression type: %s", reflect.TypeOf(t))
	}
}

func planProjectionExpr(
	ctx *tree.EvalContext,
	binOp tree.Operator,
	outputType *types.T,
	left, right tree.TypedExpr,
	columnTypes []types.T,
	input exec.Operator,
) (op exec.Operator, resultIdx int, ct []types.T, memUsed int, err error) {
	resultIdx = -1
	// There are 3 cases. Either the left is constant, the right is constant,
	// or neither are constant.
	lConstArg, lConst := left.(tree.Datum)
	if lConst {
		// Case one: The left is constant.
		// Normally, the optimizer normalizes binary exprs so that the constant
		// argument is on the right side. This doesn't happen for non-commutative
		// operators such as - and /, though, so we still need this case.
		var rightOp exec.Operator
		var rightIdx int
		rightOp, rightIdx, ct, memUsed, err = planProjectionOperators(ctx, right, columnTypes, input)
		if err != nil {
			return nil, resultIdx, ct, memUsed, err
		}
		resultIdx = len(ct)
		// The projection result will be outputted to a new column which is appended
		// to the input batch.
		op, err = exec.GetProjectionLConstOperator(left.ResolvedType(), &ct[rightIdx], binOp, rightOp, rightIdx, lConstArg, resultIdx)
		ct = append(ct, *outputType)
		if sMem, ok := op.(exec.StaticMemoryOperator); ok {
			memUsed += sMem.EstimateStaticMemoryUsage()
		}
		return op, resultIdx, ct, memUsed, err
	}
	leftOp, leftIdx, ct, leftMem, err := planProjectionOperators(ctx, left, columnTypes, input)
	if err != nil {
		return nil, resultIdx, ct, leftMem, err
	}
	if rConstArg, rConst := right.(tree.Datum); rConst {
		// Case 2: The right is constant.
		// The projection result will be outputted to a new column which is appended
		// to the input batch.
		resultIdx = len(ct)
		if binOp == tree.Like || binOp == tree.NotLike {
			negate := binOp == tree.NotLike
			op, err = exec.GetLikeProjectionOperator(
				ctx, leftOp, leftIdx, resultIdx, string(tree.MustBeDString(rConstArg)), negate)
		} else if binOp == tree.In || binOp == tree.NotIn {
			negate := binOp == tree.NotIn
			datumTuple, ok := tree.AsDTuple(rConstArg)
			if !ok {
				err = errors.Errorf("IN operator supported only on constant expressions")
				return nil, resultIdx, ct, leftMem, err
			}
			op, err = exec.GetInProjectionOperator(&ct[leftIdx], leftOp, leftIdx, resultIdx, datumTuple, negate)
		} else {
			op, err = exec.GetProjectionRConstOperator(&ct[leftIdx], right.ResolvedType(), binOp, leftOp, leftIdx, rConstArg, resultIdx)
		}
		ct = append(ct, *outputType)
		if sMem, ok := op.(exec.StaticMemoryOperator); ok {
			memUsed += sMem.EstimateStaticMemoryUsage()
		}
		return op, resultIdx, ct, leftMem + memUsed, err
	}
	// Case 3: neither are constant.
	rightOp, rightIdx, ct, rightMem, err := planProjectionOperators(ctx, right, ct, leftOp)
	if err != nil {
		return nil, resultIdx, nil, leftMem + rightMem, err
	}
	resultIdx = len(ct)
	op, err = exec.GetProjectionOperator(&ct[leftIdx], &ct[rightIdx], binOp, rightOp, leftIdx, rightIdx, resultIdx)
	ct = append(ct, *outputType)
	if sMem, ok := op.(exec.StaticMemoryOperator); ok {
		memUsed += sMem.EstimateStaticMemoryUsage()
	}
	return op, resultIdx, ct, leftMem + rightMem + memUsed, err
}

// wrapWithVectorizedStatsCollector creates a new exec.VectorizedStatsCollector
// that wraps op and connects the newly created wrapper with those
// corresponding to operators in inputs (the latter must have already been
// wrapped).
func wrapWithVectorizedStatsCollector(
	op exec.Operator, inputs []exec.Operator, pspec *distsqlpb.ProcessorSpec,
) (*exec.VectorizedStatsCollector, error) {
	inputWatch := timeutil.NewStopWatch()
	vsc := exec.NewVectorizedStatsCollector(op, pspec.ProcessorID, len(inputs) == 0, inputWatch)
	for _, input := range inputs {
		sc, ok := input.(*exec.VectorizedStatsCollector)
		if !ok {
			return nil, errors.New("unexpectedly an input is not collecting stats")
		}
		sc.SetOutputWatch(inputWatch)
	}
	return vsc, nil
}

// finishVectorizedStatsCollectors finishes the given stats collectors and
// outputs their stats to the trace contained in the ctx's span.
func finishVectorizedStatsCollectors(
	ctx context.Context,
	deterministicStats bool,
	vectorizedStatsCollectors []*exec.VectorizedStatsCollector,
	procIDs []int32,
) {
	spansByProcID := make(map[int32]opentracing.Span)
	for _, pid := range procIDs {
		// We're creating a new span for every processor setting the
		// appropriate tag so that it is displayed correctly on the flow
		// diagram.
		// TODO(yuzefovich): these spans are created and finished right
		// away which is not the way they are supposed to be used, so this
		// should be fixed.
		_, spansByProcID[pid] = tracing.ChildSpan(ctx, fmt.Sprintf("operator for processor %d", pid))
		spansByProcID[pid].SetTag(distsqlpb.ProcessorIDTagKey, pid)
	}
	for _, vsc := range vectorizedStatsCollectors {
		// TODO(yuzefovich): I'm not sure whether there are cases when
		// multiple operators correspond to a single processor. We might
		// need to do some aggregation here in that case.
		vsc.FinalizeStats()
		if deterministicStats {
			vsc.VectorizedStats.Time = 0
		}
		if vsc.ID < 0 {
			// Ignore stats collectors not associated with a processor.
			continue
		}
		tracing.SetSpanStats(spansByProcID[vsc.ID], &vsc.VectorizedStats)
	}
	for _, sp := range spansByProcID {
		sp.Finish()
	}
}

type runFn func(context.Context, context.CancelFunc)

// flowCreatorHelper contains all the logic needed to add the vectorized
// infrastructure to be run asynchronously as well as to perform some sanity
// checks.
type flowCreatorHelper interface {
	// addStreamEndpoint stores information about an inbound stream.
	addStreamEndpoint(distsqlpb.StreamID, *colrpc.Inbox, *sync.WaitGroup)
	// checkInboundStreamID checks that the provided stream ID has not been seen
	// yet.
	checkInboundStreamID(distsqlpb.StreamID) error
	// accumulateAsyncComponent stores a component (either a router or an outbox)
	// to be run asynchronously.
	accumulateAsyncComponent(runFn)
	// addMaterializer adds a materializer to the flow.
	addMaterializer(*materializer)
	// getCancelFlowFn returns a flow cancellation function.
	getCancelFlowFn() context.CancelFunc
}

// opDAGWithMetaSources is a helper struct that stores an operator DAG as well
// as the metadataSources in this DAG that need to be drained.
type opDAGWithMetaSources struct {
	rootOperator    exec.Operator
	metadataSources []distsqlpb.MetadataSource
}

// remoteComponentCreator is an interface that abstracts the constructors for
// several components in a remote flow. Mostly for testing purposes.
type remoteComponentCreator interface {
	newOutbox(input exec.Operator, typs []coltypes.T, metadataSources []distsqlpb.MetadataSource) (*colrpc.Outbox, error)
	newInbox(typs []coltypes.T) (*colrpc.Inbox, error)
}

type vectorizedRemoteComponentCreator struct{}

func (vectorizedRemoteComponentCreator) newOutbox(
	input exec.Operator, typs []coltypes.T, metadataSources []distsqlpb.MetadataSource,
) (*colrpc.Outbox, error) {
	return colrpc.NewOutbox(input, typs, metadataSources)
}

func (vectorizedRemoteComponentCreator) newInbox(typs []coltypes.T) (*colrpc.Inbox, error) {
	return colrpc.NewInbox(typs)
}

// vectorizedFlowCreator performs all the setup of vectorized flows. Depending
// on embedded flowCreatorHelper, it can either do the actual setup in order
// to run the flow or do the setup needed to check that the flow is supported
// through the vectorized engine.
type vectorizedFlowCreator struct {
	flowCreatorHelper
	remoteComponentCreator

	streamIDToInputOp              map[distsqlpb.StreamID]opDAGWithMetaSources
	recordingStats                 bool
	vectorizedStatsCollectorsQueue []*exec.VectorizedStatsCollector
	procIDs                        []int32
	waitGroup                      *sync.WaitGroup
	syncFlowConsumer               RowReceiver
	nodeDialer                     *nodedialer.Dialer
	flowID                         distsqlpb.FlowID

	// numOutboxes counts how many exec.Outboxes have been set up on this node.
	// It must be accessed atomically.
	numOutboxes       int32
	materializerAdded bool

	// leaves accumulates all operators that have no further outputs on the
	// current node, for the purposes of EXPLAIN output.
	leaves []exec.OpNode
}

func newVectorizedFlowCreator(
	helper flowCreatorHelper,
	componentCreator remoteComponentCreator,
	recordingStats bool,
	waitGroup *sync.WaitGroup,
	syncFlowConsumer RowReceiver,
	nodeDialer *nodedialer.Dialer,
	flowID distsqlpb.FlowID,
) *vectorizedFlowCreator {
	return &vectorizedFlowCreator{
		flowCreatorHelper:              helper,
		remoteComponentCreator:         componentCreator,
		streamIDToInputOp:              make(map[distsqlpb.StreamID]opDAGWithMetaSources),
		recordingStats:                 recordingStats,
		vectorizedStatsCollectorsQueue: make([]*exec.VectorizedStatsCollector, 0, 2),
		procIDs:                        make([]int32, 0, 2),
		waitGroup:                      waitGroup,
		syncFlowConsumer:               syncFlowConsumer,
		nodeDialer:                     nodeDialer,
		flowID:                         flowID,
	}
}

// setupRemoteOutputStream sets up an Outbox that will operate according to
// the given StreamEndpointSpec. It will also drain all MetadataSources in the
// metadataSourcesQueue.
func (s *vectorizedFlowCreator) setupRemoteOutputStream(
	op exec.Operator,
	outputTyps []coltypes.T,
	stream *distsqlpb.StreamEndpointSpec,
	metadataSourcesQueue []distsqlpb.MetadataSource,
) (exec.OpNode, error) {
	outbox, err := s.remoteComponentCreator.newOutbox(op, outputTyps, metadataSourcesQueue)
	if err != nil {
		return nil, err
	}
	atomic.AddInt32(&s.numOutboxes, 1)
	run := func(ctx context.Context, cancelFn context.CancelFunc) {
		outbox.Run(ctx, s.nodeDialer, stream.TargetNodeID, s.flowID, stream.StreamID, cancelFn)
		currentOutboxes := atomic.AddInt32(&s.numOutboxes, -1)
		// When the last Outbox on this node exits, we want to make sure that
		// everything is shutdown; namely, we need to call cancelFn if:
		// - it is the last Outbox
		// - there is no root materializer on this node (if it were, it would take
		// care of the cancellation itself)
		// - cancelFn is non-nil (it can be nil in tests).
		// Calling cancelFn will cancel the context that all infrastructure on this
		// node is listening on, so it will shut everything down.
		if currentOutboxes == 0 && !s.materializerAdded && cancelFn != nil {
			cancelFn()
		}
	}
	s.accumulateAsyncComponent(run)
	return outbox, nil
}

// setupRouter sets up a vectorized hash router according to the output router
// spec. If the outputs are local, these are added to s.streamIDToInputOp to be
// used as inputs in further planning. metadataSourcesQueue is passed along to
// any outboxes created to be drained, or stored in streamIDToInputOp for any
// local outputs to pass that responsibility along. In any case,
// metadataSourcesQueue will always be fully consumed.
// NOTE: This method supports only BY_HASH routers. Callers should handle
// PASS_THROUGH routers separately.
func (s *vectorizedFlowCreator) setupRouter(
	input exec.Operator,
	outputTyps []coltypes.T,
	output *distsqlpb.OutputRouterSpec,
	metadataSourcesQueue []distsqlpb.MetadataSource,
) error {
	if output.Type != distsqlpb.OutputRouterSpec_BY_HASH {
		return errors.Errorf("vectorized output router type %s unsupported", output.Type)
	}

	// TODO(asubiotto): Change hashRouter's hashCols to be uint32s.
	hashCols := make([]int, len(output.HashColumns))
	for i := range hashCols {
		hashCols[i] = int(output.HashColumns[i])
	}
	router, outputs := exec.NewHashRouter(input, outputTyps, hashCols, len(output.Streams))
	runRouter := func(ctx context.Context, _ context.CancelFunc) {
		router.Run(ctx)
	}
	s.accumulateAsyncComponent(runRouter)

	// Append the router to the metadata sources.
	metadataSourcesQueue = append(metadataSourcesQueue, router)

	foundLocalOutput := false
	for i, op := range outputs {
		stream := &output.Streams[i]
		switch stream.Type {
		case distsqlpb.StreamEndpointSpec_SYNC_RESPONSE:
			return errors.Errorf("unexpected sync response output when setting up router")
		case distsqlpb.StreamEndpointSpec_REMOTE:
			if _, err := s.setupRemoteOutputStream(op, outputTyps, stream, metadataSourcesQueue); err != nil {
				return err
			}
		case distsqlpb.StreamEndpointSpec_LOCAL:
			foundLocalOutput = true
			if s.recordingStats {
				// Wrap local outputs with vectorized stats collectors when recording
				// stats. This is mostly for compatibility but will provide some useful
				// information (e.g. output stall time).
				var err error
				op, err = wrapWithVectorizedStatsCollector(
					op, nil /* inputs */, &distsqlpb.ProcessorSpec{ProcessorID: -1},
				)
				if err != nil {
					return err
				}
			}
			s.streamIDToInputOp[stream.StreamID] = opDAGWithMetaSources{rootOperator: op, metadataSources: metadataSourcesQueue}
		}
		// Either the metadataSourcesQueue will be drained by an outbox or we
		// created an opDAGWithMetaSources to pass along these metadataSources. We don't need to
		// worry about metadata sources for following iterations of the loop.
		metadataSourcesQueue = nil
	}
	if !foundLocalOutput {
		// No local output means that our router is a leaf node.
		s.leaves = append(s.leaves, router)
	}
	return nil
}

// setupInput sets up one or more input operators (local or remote) and a
// synchronizer to expose these separate streams as one exec.Operator which is
// returned. If s.recordingStats is true, these inputs and synchronizer are
// wrapped in stats collectors if not done so, although these stats are not
// exposed as of yet. Inboxes that are created are also returned as
// []distqlpb.MetadataSource so that any remote metadata can be read through
// calling DrainMeta.
func (s *vectorizedFlowCreator) setupInput(
	input distsqlpb.InputSyncSpec,
) (op exec.Operator, _ []distsqlpb.MetadataSource, memUsed int, _ error) {
	inputStreamOps := make([]exec.Operator, 0, len(input.Streams))
	metaSources := make([]distsqlpb.MetadataSource, 0, len(input.Streams))
	for _, inputStream := range input.Streams {
		switch inputStream.Type {
		case distsqlpb.StreamEndpointSpec_LOCAL:
			in := s.streamIDToInputOp[inputStream.StreamID]
			inputStreamOps = append(inputStreamOps, in.rootOperator)
			metaSources = append(metaSources, in.metadataSources...)
		case distsqlpb.StreamEndpointSpec_REMOTE:
			// If the input is remote, the input operator does not exist in
			// streamIDToInputOp. Create an inbox.
			if err := s.checkInboundStreamID(inputStream.StreamID); err != nil {
				return nil, nil, memUsed, err
			}
			typs, err := typeconv.FromColumnTypes(input.ColumnTypes)
			if err != nil {
				return nil, nil, memUsed, err
			}
			inbox, err := s.remoteComponentCreator.newInbox(typs)
			if err != nil {
				return nil, nil, memUsed, err
			}
			s.addStreamEndpoint(inputStream.StreamID, inbox, s.waitGroup)
			metaSources = append(metaSources, inbox)
			op = inbox
			memUsed += op.(exec.StaticMemoryOperator).EstimateStaticMemoryUsage()
			if s.recordingStats {
				op, err = wrapWithVectorizedStatsCollector(
					inbox,
					nil, /* inputs */
					// TODO(asubiotto): Vectorized stats collectors currently expect a
					// processor ID. These stats will not be shown until we extend stats
					// collectors to take in a stream ID.
					&distsqlpb.ProcessorSpec{
						ProcessorID: -1,
					},
				)
				if err != nil {
					return nil, nil, memUsed, err
				}
			}
			inputStreamOps = append(inputStreamOps, op)
		default:
			return nil, nil, memUsed, errors.Errorf("unsupported input stream type %s", inputStream.Type)
		}
	}
	op = inputStreamOps[0]
	if len(inputStreamOps) > 1 {
		statsInputs := inputStreamOps
		typs, err := typeconv.FromColumnTypes(input.ColumnTypes)
		if err != nil {
			return nil, nil, memUsed, err
		}
		if input.Type == distsqlpb.InputSyncSpec_ORDERED {
			op = exec.NewOrderedSynchronizer(
				inputStreamOps, typs, distsqlpb.ConvertToColumnOrdering(input.Ordering),
			)
			memUsed += op.(exec.StaticMemoryOperator).EstimateStaticMemoryUsage()
		} else {
			op = exec.NewUnorderedSynchronizer(inputStreamOps, typs, s.waitGroup)
			// Don't use the unordered synchronizer's inputs for stats collection
			// given that they run concurrently. The stall time will be collected
			// instead.
			statsInputs = nil
		}
		if s.recordingStats {
			// TODO(asubiotto): Once we have IDs for synchronizers, plumb them into
			// this stats collector to display stats.
			var err error
			op, err = wrapWithVectorizedStatsCollector(op, statsInputs, &distsqlpb.ProcessorSpec{ProcessorID: -1})
			if err != nil {
				return nil, nil, memUsed, err
			}
		}
	}
	return op, metaSources, memUsed, nil
}

// setupOutput sets up any necessary infrastructure according to the output
// spec of pspec. The metadataSourcesQueue is fully consumed by either
// connecting it to a component that can drain these MetadataSources (root
// materializer or outbox) or storing it in streamIDToInputOp with the given op
// to be processed later.
// NOTE: The caller must not reuse the metadataSourcesQueue.
func (s *vectorizedFlowCreator) setupOutput(
	ctx context.Context,
	flowCtx *FlowCtx,
	pspec *distsqlpb.ProcessorSpec,
	op exec.Operator,
	opOutputTypes []coltypes.T,
	metadataSourcesQueue []distsqlpb.MetadataSource,
) error {
	output := &pspec.Output[0]
	if output.Type != distsqlpb.OutputRouterSpec_PASS_THROUGH {
		return s.setupRouter(
			op,
			opOutputTypes,
			output,
			// Pass in a copy of the queue to reset metadataSourcesQueue for
			// further appends without overwriting.
			metadataSourcesQueue,
		)
	}

	if len(output.Streams) != 1 {
		return errors.Errorf("unsupported multi outputstream proc (%d streams)", len(output.Streams))
	}
	outputStream := &output.Streams[0]
	switch outputStream.Type {
	case distsqlpb.StreamEndpointSpec_LOCAL:
		s.streamIDToInputOp[outputStream.StreamID] = opDAGWithMetaSources{rootOperator: op, metadataSources: metadataSourcesQueue}
	case distsqlpb.StreamEndpointSpec_REMOTE:
		// Set up an Outbox. Note that we pass in a copy of metadataSourcesQueue
		// so that we can reset it below and keep on writing to it.
		if s.recordingStats {
			// If recording stats, we add a metadata source that will generate all
			// stats data as metadata for the stats collectors created so far.
			vscs := append([]*exec.VectorizedStatsCollector(nil), s.vectorizedStatsCollectorsQueue...)
			s.vectorizedStatsCollectorsQueue = s.vectorizedStatsCollectorsQueue[:0]
			metadataSourcesQueue = append(
				metadataSourcesQueue,
				distsqlpb.CallbackMetadataSource{
					DrainMetaCb: func(ctx context.Context) []distsqlpb.ProducerMetadata {
						// TODO(asubiotto): Who is responsible for the recording of the
						// parent context?
						// Start a separate recording so that GetRecording will return
						// the recordings for only the child spans containing stats.
						ctx, span := tracing.ChildSpanSeparateRecording(ctx, "")
						finishVectorizedStatsCollectors(ctx, flowCtx.Cfg.TestingKnobs.DeterministicStats, vscs, s.procIDs)
						return []distsqlpb.ProducerMetadata{{TraceData: tracing.GetRecording(span)}}
					},
				},
			)
		}
		outbox, err := s.setupRemoteOutputStream(op, opOutputTypes, outputStream, metadataSourcesQueue)
		if err != nil {
			return err
		}
		// An outbox is a leaf: there's nothing that sees it as an input on this
		// node.
		s.leaves = append(s.leaves, outbox)
	case distsqlpb.StreamEndpointSpec_SYNC_RESPONSE:
		if s.syncFlowConsumer == nil {
			return errors.New("syncFlowConsumer unset, unable to create materializer")
		}
		// Make the materializer, which will write to the given receiver.
		columnTypes := s.syncFlowConsumer.Types()
		var outputStatsToTrace func()
		if s.recordingStats {
			// Make a copy given that vectorizedStatsCollectorsQueue is reset and
			// appended to.
			vscq := append([]*exec.VectorizedStatsCollector(nil), s.vectorizedStatsCollectorsQueue...)
			outputStatsToTrace = func() {
				finishVectorizedStatsCollectors(
					ctx, flowCtx.Cfg.TestingKnobs.DeterministicStats, vscq, s.procIDs,
				)
			}
		}
		proc, err := newMaterializer(
			flowCtx,
			pspec.ProcessorID,
			op,
			columnTypes,
			&distsqlpb.PostProcessSpec{},
			s.syncFlowConsumer,
			metadataSourcesQueue,
			outputStatsToTrace,
			s.getCancelFlowFn,
		)
		if err != nil {
			return err
		}
		s.vectorizedStatsCollectorsQueue = s.vectorizedStatsCollectorsQueue[:0]
		// A materializer is a leaf.
		s.leaves = append(s.leaves, proc)
		s.addMaterializer(proc)
		s.materializerAdded = true
	default:
		return errors.Errorf("unsupported output stream type %s", outputStream.Type)
	}
	return nil
}

func (s *vectorizedFlowCreator) setupFlow(
	ctx context.Context,
	flowCtx *FlowCtx,
	processorSpecs []distsqlpb.ProcessorSpec,
	acc *mon.BoundAccount,
) (leaves []exec.OpNode, err error) {
	streamIDToSpecIdx := make(map[distsqlpb.StreamID]int)
	// queue is a queue of indices into processorSpecs, for topologically
	// ordered processing.
	queue := make([]int, 0, len(processorSpecs))
	for i := range processorSpecs {
		hasLocalInput := false
		for j := range processorSpecs[i].Input {
			input := &processorSpecs[i].Input[j]
			for k := range input.Streams {
				stream := &input.Streams[k]
				streamIDToSpecIdx[stream.StreamID] = i
				if stream.Type != distsqlpb.StreamEndpointSpec_REMOTE {
					hasLocalInput = true
				}
			}
		}
		if hasLocalInput {
			continue
		}
		// Queue all processors with either no inputs or remote inputs.
		queue = append(queue, i)
	}

	inputs := make([]exec.Operator, 0, 2)
	for len(queue) > 0 {
		pspec := &processorSpecs[queue[0]]
		queue = queue[1:]
		if len(pspec.Output) > 1 {
			return nil, errors.Errorf("unsupported multi-output proc (%d outputs)", len(pspec.Output))
		}

		// metadataSourcesQueue contains all the MetadataSources that need to be
		// drained. If in a given loop iteration no component that can drain
		// metadata from these sources is found, the metadataSourcesQueue should be
		// added as part of one of the last unconnected inputDAGs in
		// streamIDToInputOp. This is to avoid cycles.
		metadataSourcesQueue := make([]distsqlpb.MetadataSource, 0, 1)
		inputs = inputs[:0]
		for i := range pspec.Input {
			input, metadataSources, memUsed, err := s.setupInput(pspec.Input[i])
			if err != nil {
				return nil, err
			}
			if err = acc.Grow(ctx, int64(memUsed)); err != nil {
				return nil, errors.Wrapf(err, "not enough memory to setup vectorized plan")
			}
			metadataSourcesQueue = append(metadataSourcesQueue, metadataSources...)
			inputs = append(inputs, input)
		}

		result, err := newColOperator(ctx, flowCtx, pspec, inputs)
		if err != nil {
			return nil, errors.Wrapf(err, "unable to vectorize execution plan")
		}
		if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.VectorizeAuto &&
			!result.isStreaming {
			return nil, errors.Errorf("non-streaming operator encountered when vectorize=auto")
		}
		if err = acc.Grow(ctx, int64(result.memUsage)); err != nil {
			return nil, errors.Wrapf(err, "not enough memory to setup vectorized plan")
		}
		metadataSourcesQueue = append(metadataSourcesQueue, result.metadataSources...)

		op := result.op
		if s.recordingStats {
			vsc, err := wrapWithVectorizedStatsCollector(op, inputs, pspec)
			if err != nil {
				return nil, err
			}
			s.vectorizedStatsCollectorsQueue = append(s.vectorizedStatsCollectorsQueue, vsc)
			s.procIDs = append(s.procIDs, pspec.ProcessorID)
			op = vsc
		}

		if flowCtx.EvalCtx.SessionData.VectorizeMode == sessiondata.VectorizeAuto &&
			pspec.Output[0].Type == distsqlpb.OutputRouterSpec_BY_HASH {
			// exec.HashRouter can do unlimited buffering, and it is present in the
			// flow, so we don't want to run such a flow via the vectorized engine
			// when vectorize=auto.
			return nil, errors.Errorf("hash router encountered when vectorize=auto")
		}
		opOutputTypes, err := typeconv.FromColumnTypes(result.columnTypes)
		if err != nil {
			return nil, err
		}
		if err = s.setupOutput(
			ctx, flowCtx, pspec, op, opOutputTypes, metadataSourcesQueue,
		); err != nil {
			return nil, err
		}

		// Now queue all outputs from this op whose inputs are already all
		// populated.
	NEXTOUTPUT:
		for i := range pspec.Output {
			for j := range pspec.Output[i].Streams {
				stream := &pspec.Output[i].Streams[j]
				if stream.Type != distsqlpb.StreamEndpointSpec_LOCAL {
					continue
				}
				procIdx, ok := streamIDToSpecIdx[stream.StreamID]
				if !ok {
					return nil, errors.Errorf("couldn't find stream %d", stream.StreamID)
				}
				outputSpec := &processorSpecs[procIdx]
				for k := range outputSpec.Input {
					for l := range outputSpec.Input[k].Streams {
						stream := outputSpec.Input[k].Streams[l]
						if stream.Type == distsqlpb.StreamEndpointSpec_REMOTE {
							// Remote streams are not present in streamIDToInputOp. The
							// Inboxes that consume these streams are created at the same time
							// as the operator that needs them, so skip the creation check for
							// this input.
							continue
						}
						if _, ok := s.streamIDToInputOp[stream.StreamID]; !ok {
							continue NEXTOUTPUT
						}
					}
				}
				// We found an input op for every single stream in this output. Queue
				// it for processing.
				queue = append(queue, procIdx)
			}
		}
	}

	if len(s.vectorizedStatsCollectorsQueue) > 0 {
		panic("not all vectorized stats collectors have been processed")
	}
	return s.leaves, nil
}

// vectorizedFlowCreatorHelper is a flowCreatorHelper that sets up all the
// vectorized infrastructure to be actually run.
type vectorizedFlowCreatorHelper struct {
	f *Flow
}

var _ flowCreatorHelper = &vectorizedFlowCreatorHelper{}

func (r *vectorizedFlowCreatorHelper) addStreamEndpoint(
	streamID distsqlpb.StreamID, inbox *colrpc.Inbox, wg *sync.WaitGroup,
) {
	r.f.inboundStreams[streamID] = &inboundStreamInfo{
		receiver:  vectorizedInboundStreamHandler{inbox},
		waitGroup: wg,
	}
}

func (r *vectorizedFlowCreatorHelper) checkInboundStreamID(sid distsqlpb.StreamID) error {
	return r.f.checkInboundStreamID(sid)
}

func (r *vectorizedFlowCreatorHelper) accumulateAsyncComponent(run runFn) {
	r.f.startables = append(
		r.f.startables,
		startableFn(func(ctx context.Context, wg *sync.WaitGroup, cancelFn context.CancelFunc) {
			if wg != nil {
				wg.Add(1)
			}
			go func() {
				run(ctx, cancelFn)
				if wg != nil {
					wg.Done()
				}
			}()
		}),
	)
}

func (r *vectorizedFlowCreatorHelper) addMaterializer(m *materializer) {
	r.f.processors = make([]Processor, 1)
	r.f.processors[0] = m
}

func (r *vectorizedFlowCreatorHelper) getCancelFlowFn() context.CancelFunc {
	return r.f.ctxCancel
}

func (f *Flow) setupVectorizedFlow(ctx context.Context, acc *mon.BoundAccount) error {
	recordingStats := false
	if sp := opentracing.SpanFromContext(ctx); sp != nil && tracing.IsRecording(sp) {
		recordingStats = true
	}
	helper := &vectorizedFlowCreatorHelper{f: f}
	creator := newVectorizedFlowCreator(
		helper, vectorizedRemoteComponentCreator{}, recordingStats, &f.waitGroup, f.syncFlowConsumer, f.Cfg.NodeDialer, f.id,
	)
	_, err := creator.setupFlow(ctx, &f.FlowCtx, f.spec.Processors, acc)
	return err
}

// noopFlowCreatorHelper is a flowCreatorHelper that only performs sanity
// checks.
type noopFlowCreatorHelper struct {
	inboundStreams map[distsqlpb.StreamID]struct{}
}

var _ flowCreatorHelper = &noopFlowCreatorHelper{}

func newNoopFlowCreatorHelper() *noopFlowCreatorHelper {
	return &noopFlowCreatorHelper{
		inboundStreams: make(map[distsqlpb.StreamID]struct{}),
	}
}

func (r *noopFlowCreatorHelper) addStreamEndpoint(
	streamID distsqlpb.StreamID, _ *colrpc.Inbox, _ *sync.WaitGroup,
) {
	r.inboundStreams[streamID] = struct{}{}
}

func (r *noopFlowCreatorHelper) checkInboundStreamID(sid distsqlpb.StreamID) error {
	if _, found := r.inboundStreams[sid]; found {
		return errors.Errorf("inbound stream %d already exists in map", sid)
	}
	return nil
}

func (r *noopFlowCreatorHelper) accumulateAsyncComponent(runFn) {}

func (r *noopFlowCreatorHelper) addMaterializer(*materializer) {}

func (r *noopFlowCreatorHelper) getCancelFlowFn() context.CancelFunc {
	return nil
}

// SupportsVectorized checks whether flow is supported by the vectorized engine
// and returns an error if it isn't. Note that it does so by setting up the
// full flow without running the components asynchronously.
// It returns a list of the leaf operators of all flows for the purposes of
// EXPLAIN output.
func SupportsVectorized(
	ctx context.Context, flowCtx *FlowCtx, processorSpecs []distsqlpb.ProcessorSpec,
) (leaves []exec.OpNode, err error) {
	creator := newVectorizedFlowCreator(
		newNoopFlowCreatorHelper(),
		vectorizedRemoteComponentCreator{},
		false,        /* recordingStats */
		nil,          /* waitGroup */
		&RowBuffer{}, /* syncFlowConsumer */
		nil,          /* nodeDialer */
		distsqlpb.FlowID{},
	)
	// We create an unlimited memory account because we're interested whether the
	// flow is supported via the vectorized engine in general (without paying
	// attention to the memory since it is node-dependent in the distributed
	// case).
	memoryMonitor := mon.MakeMonitor(
		"supports-vectorized",
		mon.MemoryResource,
		nil,           /* curCount */
		nil,           /* maxHist */
		-1,            /* increment */
		math.MaxInt64, /* noteworthy */
		flowCtx.Cfg.Settings,
	)
	memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(math.MaxInt64))
	defer memoryMonitor.Stop(ctx)
	acc := memoryMonitor.MakeBoundAccount()
	defer acc.Close(ctx)
	return creator.setupFlow(ctx, flowCtx, processorSpecs, &acc)
}