proposal: x/exp/xiter: new package with iterator adapters #61898

rsc · 2023-08-09T16:02:49Z

We propose to add a new package golang.org/x/exp/xiter that defines adapters on iterators. Perhaps these would one day be moved to the iter package or perhaps not. There are concerns about how these would affect idiomatic Go code. It seems worth defining them in x/exp to help that discussion along, and then we can decide whether they move anywhere else when we have more experience with them.

The package is called xiter to avoid a collision with the standard library iter (see proposal #61897). An alternative would be to have xiter define wrappers and type aliases for all the functions and types in the standard iter package, but the type aliases would depend on #46477, which is not yet implemented.

This is one of a collection of proposals updating the standard library for the new 'range over function' feature (#61405). It would only be accepted if that proposal is accepted. See #61897 for a list of related proposals.

Edit, 2024-05-15: Added some missing 2s in function names, and also changed Reduce to take the function first, instead of between sum and seq.

Edit, 2024-07-17: Updated code to match the final Go 1.23 language change. Corrected various typos.

/*
Package xiter implements basic adapters for composing iterator sequences:

[Concat] and [Concat2] concatenate sequences.
[Equal], [Equal2], [EqualFunc], and [EqualFunc2] check whether two sequences contain equal values.
[Filter] and [Filter2] filter a sequence according to a function f.
[Limit] and [Limit2] truncate a sequence after n items.
[Map] and [Map2] apply a function f to a sequence.
[Merge], [Merge2], [MergeFunc], and [MergeFunc2] merge two ordered sequences.
[Reduce] and [Reduce2] combine the values in a sequence.
[Zip] and [Zip2] iterate over two sequences in parallel.

*/

package xiter

import (
	"cmp"
	"iter"
)

// Concat returns an iterator over the concatenation of the sequences.
func Concat[V any](seqs ...iter.Seq[V]) iter.Seq[V] {
	return func(yield func(V) bool) {
		for _, seq := range seqs {
			for e := range seq {
				if !yield(e) {
					return
				}
			}
		}
	}
}

// Concat2 returns an iterator over the concatenation of the sequences.
func Concat2[K, V any](seqs ...iter.Seq2[K, V]) iter.Seq2[K, V] {
	return func(yield func(K, V) bool) {
		for _, seq := range seqs {
			for k, v := range seq {
				if !yield(k, v) {
					return
				}
			}
		}
	}
}

// Equal reports whether the two sequences are equal.
func Equal[V comparable](x, y iter.Seq[V]) bool {
	for z := range Zip(x, y) {
		if z.Ok1 != z.Ok2 || z.V1 != z.V2 {
			return false
		}
	}
	return true
}

// Equal2 reports whether the two sequences are equal.
func Equal2[K, V comparable](x, y iter.Seq2[K, V]) bool {
	for z := range Zip2(x, y) {
		if z.Ok1 != z.Ok2 || z.K1 != z.K2 || z.V1 != z.V2 {
			return false
		}
	}
	return true
}

// EqualFunc reports whether the two sequences are equal according to the function f.
func EqualFunc[V1, V2 any](x iter.Seq[V1], y iter.Seq[V2], f func(V1, V2) bool) bool {
	for z := range Zip(x, y) {
		if z.Ok1 != z.Ok2 || !f(z.V1, z.V2) {
			return false
		}
	}
	return true
}

// EqualFunc2 reports whether the two sequences are equal according to the function f.
func EqualFunc2[K1, V1, K2, V2 any](x iter.Seq2[K1, V1], y iter.Seq2[K2, V2], f func(K1, V1, K2, V2) bool) bool {
	for z := range Zip2(x, y) {
		if z.Ok1 != z.Ok2 || !f(z.K1, z.V1, z.K2, z.V2) {
			return false
		}
	}
	return true
}

// Filter returns an iterator over seq that only includes
// the values v for which f(v) is true.
func Filter[V any](f func(V) bool, seq iter.Seq[V]) iter.Seq[V] {
	return func(yield func(V) bool) {
		for v := range seq {
			if f(v) && !yield(v) {
				return
			}
		}
	}
}

// Filter2 returns an iterator over seq that only includes
// the pairs k, v for which f(k, v) is true.
func Filter2[K, V any](f func(K, V) bool, seq iter.Seq2[K, V]) iter.Seq2[K, V] {
	return func(yield func(K, V) bool) {
		for k, v := range seq {
			if f(k, v) && !yield(k, v) {
				return
			}
		}
	}
}

// Limit returns an iterator over seq that stops after n values.
func Limit[V any](seq iter.Seq[V], n int) iter.Seq[V] {
	return func(yield func(V) bool) {
		if n <= 0 {
			return
		}
		for v := range seq {
			if !yield(v) {
				return
			}
			if n--; n <= 0 {
				break
			}
		}
	}
}

// Limit2 returns an iterator over seq that stops after n key-value pairs.
func Limit2[K, V any](seq iter.Seq2[K, V], n int) iter.Seq2[K, V] {
	return func(yield func(K, V) bool) {
		if n <= 0 {
			return
		}
		for k, v := range seq {
			if !yield(k, v) {
				return
			}
			if n--; n <= 0 {
				break
			}
		}
	}
}

// Map returns an iterator over f applied to seq.
func Map[In, Out any](f func(In) Out, seq iter.Seq[In]) iter.Seq[Out] {
	return func(yield func(Out) bool) {
		for in := range seq {
			if !yield(f(in)) {
				return
			}
		}
	}
}

// Map2 returns an iterator over f applied to seq.
func Map2[KIn, VIn, KOut, VOut any](f func(KIn, VIn) (KOut, VOut), seq iter.Seq2[KIn, VIn]) iter.Seq2[KOut, VOut] {
	return func(yield func(KOut, VOut) bool) {
		for k, v := range seq {
			if !yield(f(k, v)) {
				return
			}
		}
	}
}

// Merge merges two sequences of ordered values.
// Values appear in the output once for each time they appear in x
// and once for each time they appear in y.
// If the two input sequences are not ordered,
// the output sequence will not be ordered,
// but it will still contain every value from x and y exactly once.
//
// Merge is equivalent to calling MergeFunc with cmp.Compare[V]
// as the ordering function.
func Merge[V cmp.Ordered](x, y iter.Seq[V]) iter.Seq[V] {
	return MergeFunc(x, y, cmp.Compare[V])
}

// MergeFunc merges two sequences of values ordered by the function f.
// Values appear in the output once for each time they appear in x
// and once for each time they appear in y.
// When equal values appear in both sequences,
// the output contains the values from x before the values from y.
// If the two input sequences are not ordered by f,
// the output sequence will not be ordered by f,
// but it will still contain every value from x and y exactly once.
func MergeFunc[V any](x, y iter.Seq[V], f func(V, V) int) iter.Seq[V] {
	return func(yield func(V) bool) {
		next, stop := iter.Pull(y)
		defer stop()
		v2, ok2 := next()
		for v1 := range x {
			for ok2 && f(v1, v2) > 0 {
				if !yield(v2) {
					return
				}
				v2, ok2 = next()
			}
			if !yield(v1) {
				return
			}
		}
		for ok2 {
			if !yield(v2) {
				return
			}
			v2, ok2 = next()
		}
	}
}

// Merge2 merges two sequences of key-value pairs ordered by their keys.
// Pairs appear in the output once for each time they appear in x
// and once for each time they appear in y.
// If the two input sequences are not ordered by their keys,
// the output sequence will not be ordered by its keys,
// but it will still contain every pair from x and y exactly once.
//
// Merge2 is equivalent to calling MergeFunc2 with cmp.Compare[K]
// as the ordering function.
func Merge2[K cmp.Ordered, V any](x, y iter.Seq2[K, V]) iter.Seq2[K, V] {
	return MergeFunc2(x, y, cmp.Compare[K])
}

// MergeFunc2 merges two sequences of key-value pairs ordered by the function f.
// Pairs appear in the output once for each time they appear in x
// and once for each time they appear in y.
// When pairs with equal keys appear in both sequences,
// the output contains the pairs from x before the pairs from y.
// If the two input sequences are not ordered by f,
// the output sequence will not be ordered by f,
// but it will still contain every pair from x and y exactly once.
func MergeFunc2[K, V any](x, y iter.Seq2[K, V], f func(K, K) int) iter.Seq2[K, V] {
	return func(yield func(K, V) bool) {
		next, stop := iter.Pull2(y)
		defer stop()
		k2, v2, ok2 := next()
		for k1, v1 := range x {
			for ok2 && f(k1, k2) > 0 {
				if !yield(k2, v2) {
					return
				}
				k2, v2, ok2 = next()
			}
			if !yield(k1, v1) {
				return
			}
		}
		for ok2 {
			if !yield(k2, v2) {
				return
			}
			k2, v2, ok2 = next()
		}
	}
}

// Reduce combines the values in seq using f.
// For each value v in seq, it updates sum = f(sum, v)
// and then returns the final sum.
// For example, if iterating over seq yields v1, v2, v3,
// Reduce returns f(f(f(sum, v1), v2), v3).
func Reduce[Sum, V any](f func(Sum, V) Sum, sum Sum, seq iter.Seq[V]) Sum {
	for v := range seq {
		sum = f(sum, v)
	}
	return sum
}

// Reduce2 combines the values in seq using f.
// For each pair k, v in seq, it updates sum = f(sum, k, v)
// and then returns the final sum.
// For example, if iterating over seq yields (k1, v1), (k2, v2), (k3, v3)
// Reduce returns f(f(f(sum, k1, v1), k2, v2), k3, v3).
func Reduce2[Sum, K, V any](f func(Sum, K, V) Sum, sum Sum, seq iter.Seq2[K, V]) Sum {
	for k, v := range seq {
		sum = f(sum, k, v)
	}
	return sum
}

// A Zipped is a pair of zipped values, one of which may be missing,
// drawn from two different sequences.
type Zipped[V1, V2 any] struct {
	V1  V1
	Ok1 bool // whether V1 is present (if not, it will be zero)
	V2  V2
	Ok2 bool // whether V2 is present (if not, it will be zero)
}

// Zip returns an iterator that iterates x and y in parallel,
// yielding Zipped values of successive elements of x and y.
// If one sequence ends before the other, the iteration continues
// with Zipped values in which either Ok1 or Ok2 is false,
// depending on which sequence ended first.
//
// Zip is a useful building block for adapters that process
// pairs of sequences. For example, Equal can be defined as:
//
//	func Equal[V comparable](x, y iter.Seq[V]) bool {
//		for z := range Zip(x, y) {
//			if z.Ok1 != z.Ok2 || z.V1 != z.V2 {
//				return false
//			}
//		}
//		return true
//	}
func Zip[V1, V2 any](x iter.Seq[V1], y iter.Seq[V2]) iter.Seq[Zipped[V1, V2]] {
	return func(yield func(z Zipped[V1, V2]) bool) {
		next, stop := iter.Pull(y)
		defer stop()
		v2, ok2 := next()
		for v1 := range x {
			if !yield(Zipped[V1, V2]{v1, true, v2, ok2}) {
				return
			}
			v2, ok2 = next()
		}
		var zv1 V1
		for ok2 {
			if !yield(Zipped[V1, V2]{zv1, false, v2, ok2}) {
				return
			}
			v2, ok2 = next()
		}
	}
}

// A Zipped2 is a pair of zipped key-value pairs,
// one of which may be missing, drawn from two different sequences.
type Zipped2[K1, V1, K2, V2 any] struct {
	K1  K1
	V1  V1
	Ok1 bool // whether K1, V1 are present (if not, they will be zero)
	K2  K2
	V2  V2
	Ok2 bool // whether K2, V2 are present (if not, they will be zero)
}

// Zip2 returns an iterator that iterates x and y in parallel,
// yielding Zipped2 values of successive elements of x and y.
// If one sequence ends before the other, the iteration continues
// with Zipped2 values in which either Ok1 or Ok2 is false,
// depending on which sequence ended first.
//
// Zip2 is a useful building block for adapters that process
// pairs of sequences. For example, Equal2 can be defined as:
//
//	func Equal2[K, V comparable](x, y iter.Seq2[K, V]) bool {
//		for z := range Zip2(x, y) {
//			if z.Ok1 != z.Ok2 || z.K1 != z.K2 || z.V1 != z.V2 {
//				return false
//			}
//		}
//		return true
//	}
func Zip2[K1, V1, K2, V2 any](x iter.Seq2[K1, V1], y iter.Seq2[K2, V2]) iter.Seq[Zipped2[K1, V1, K2, V2]] {
	return func(yield func(z Zipped2[K1, V1, K2, V2]) bool) {
		next, stop := iter.Pull2(y)
		defer stop()
		k2, v2, ok2 := next()
		for k1, v1 := range x {
			if !yield(Zipped2[K1, V1, K2, V2]{k1, v1, true, k2, v2, ok2}) {
				return
			}
			k2, v2, ok2 = next()
		}
		var zk1 K1
		var zv1 V1
		for ok2 {
			if !yield(Zipped2[K1, V1, K2, V2]{zk1, zv1, false, k2, v2, ok2}) {
				return
			}
			k2, v2, ok2 = next()
		}
	}
}

The text was updated successfully, but these errors were encountered:

gophun · 2023-08-09T17:23:01Z

The duplication of each function is the first thing that catches the eye. Are there thoughts on why this is acceptable?

gophun · 2023-08-09T17:39:44Z

What about an adapter that converts an iter.Seq[V] to an iter.Seq2[int, V] and an adapter that converts an iter.Seq2[K, V] to an iter.Seq[V]?

zephyrtronium · 2023-08-09T17:58:55Z

Some typos: EqualFunc2, Map2, Merge2, and MergeFunc2 lack the 2 suffixes on their actual names. They're all correct in the corresponding documentation.

earthboundkid · 2023-08-09T20:05:17Z

May I humbly suggest that the name "iterutils" is less susceptible to, uh, unfortunate mispronunciation.

earthboundkid · 2023-08-09T20:09:11Z

For Reduce, the callback should go last: func Reduce[Sum, V any](sum Sum, seq Seq[V], f func(Sum, V) Sum) Sum.

DeedleFake · 2023-08-09T20:41:23Z

For Reduce, the callback should go last: func Reduce[Sum, V any](sum Sum, seq Seq[V], f func(Sum, V) Sum) Sum.

I'd actually prefer func Reduce[Sum, V any](seq Seq[V], sum Sum, f func(Sum, V) Sum) Sum.

Edit: I just realized that if Reduce() is being used to build an array, putting sum first puts everything in the same order as Append() and other functions that put the destination first. I'm not sure if that's worth it or not.

rsc · 2023-08-09T21:41:23Z

This proposal has been added to the active column of the proposals project
and will now be reviewed at the weekly proposal review meetings.
— rsc for the proposal review group

DeedleFake · 2023-08-10T00:47:52Z

The more I think about it, the more that I think that API design for this should wait until after a decision is made on #49085. Multiple other languages have proven over and over that a left-to-right chained syntax is vastly superior ergonomically to simple top-level functions for iterators. For example, compare

nonNegative := xiter.Filter(
  xiter.Map(
    bufio.Lines(r),
    parseLine,
  ),
  func(v int) bool { return v >= 0 },
)

vs.

nonNegative := bufio.Lines(r).
  Map(parseLine).
  Filter(func(v int) bool { return v >= 0 })

Go's a little weird because of the need to put the .on the previous line, but other than that one oddity, which I could get used to, the second is better in every way. It reads in the order that actions actually happen, it's less repetitive, etc. The only real way to emulate it currently is something like

lines := bufio.Lines(r)
intlines := xiter.Map(lines, parseLine)
nonNegative := xiter.Filter(func(v int) bool { return v >= 0 })

That works, but it clutters up the local namespace and it's significantly harder to edit. For example, if you decide you need to add a new step in the chain, you have to make sure that all of the variables for each iterator match up in the previous and succeeding calls.

ianlancetaylor · 2023-08-10T00:55:14Z

What type does bufio.Lines return to make that work in Go? What methods does that type support? What is the type of nonNegative? I mean these as honest questions. Can we write this kind of code in Go today, or would we need new language features?

hherman1 · 2023-08-10T01:50:04Z

You would probably have to wrap the base iterator like:

stream.New(bufio.Lines).
    Filter(…).
    …

DeedleFake · 2023-08-10T03:00:48Z

@ianlancetaylor

Sorry. I should have stuck a comment in. I was just coming up with some hypothetical function that would give an iter.Seq[string]. In this case, the idea was that it would internally use a bufio.Scanner to yield lines from an io.Reader or something. My original code had an anonymous func(string) int instead of the vague parseLine but I removed it because it was clogging up the example with irrelevant code and I didn't clarify when I did.

@hherman1

Not necessarily. The transformative and sink functions on iterators could just be defined as methods on iter.Seq.

hherman1 · 2023-08-10T03:09:24Z

~~But iter.Seq is an interface type no? Are you saying it should be a struct type?~~

I was wrong, it’s not an interface.

benhoyt · 2023-08-10T03:22:51Z

Why do some functions take the f func as the last parameter, but Filter and Map take it as the first, and Reduce in the middle? Most other functions in the stdlib take funcs as the last parameter, such as sort.Slice, slices.*Func, ServeMux.HandleFunc, and so on. This makes code that uses them with inline function literals more readable:

names := xiter.Map(func (p Person) string {
	return p.Name
}, people) // "people" gets lost

// vs

names := xiter.Map(people, func (p Person) string {
	return p.Name
})

Merovius · 2023-08-10T05:58:35Z

@DeedleFake There won't be a "decision" on #49085 anytime soon. There are good reasons not to do it yet, but we also don't want to say it never happens. The issue exists to reflect that state. What it comes down to is, would you rather have no iterators (for the foreseeable future) or ones which can't be "chained"?

DeedleFake · 2023-08-10T06:04:09Z

What it comes down to is, would you rather have no iterators (for the foreseeable future) or ones which can't be "chained"?

No iterators, definitely. I've done fine without them for over a decade. I can wait a bit longer. If a bad implementation goes in, I'll never get a good version. Plus, I can just write my own implementation of whatever iterator functions I need as long as range-over-func exists while I wait.

gophun · 2023-08-10T06:47:46Z

Neither chaining nor functional programming has ever been a decisive or recommended technique in Go. Instead, iteration—specifically, procedural 'for' loops—has always been a core technique since the language's inception. The iterator proposals aim to enhance this core approach. While I don't know what the overall plans are, if you're hoping for Go to follow the path of Java Streams or C# LINQ, you might be in for disappointment.

Merovius · 2023-08-10T10:02:02Z

I can wait a bit longer. If a bad implementation goes in, I'll never get a good version.

I think "a bit" is misleading. We are talking years - if at all. And I don't believe the second part of that sentence is true either, we could always release a v2 of the relevant packages, if we ever manage to do #49085 in a decade or so.

DeedleFake · 2023-08-10T13:54:52Z

While I don't know what the overall plans are, if you're hoping for Go to follow the path of Java Streams or C# LINQ, you might be in for disappointment.

Is that not the intention of these proposals? To build a standardized iterator system that works similarly to those? Why else is there a proposal here for Map(), Filter(), and Reduce(), among others? I have no problem with functions like slices.Backwards() and other source function proposals. My only problem is the transformative and sink functions.

I think "a bit" is misleading. We are talking years - if at all. And I don't believe the second part of that sentence is true either, we could always release a v2 of the relevant packages, if we ever manage to do #49085 in a decade or so.

Edit: The way this proposal is phrased does actually imply that they may be heavily reevaluated enough in x/exp that they may not go into the standard library at all, so maybe my point is moot after all. I still think that this is a valid issue with the API design to bring up, but maybe it's a bit off-topic for this particular proposal and should wait until after they're in x/exp and it can be more easily demonstrated how awkward they are to use. I don't like the idea that existing code will be broken when some variant of them does potentially go into the standard library, but it's less of a problem than I was worried about. Never mind. Please ignore my rambling below.

That issue has only been open for 2 years. I think assuming that it'll take a decade to solve is a bit unfair. Yes, a v2 is an option, especially if #61716 is accepted, but that was created out of necessity to deal with problems in an existing package, while this would essentially be purposefully putting problems into a new package. It's not like I'm saying that iterators are unacceptable to me in this state, just that features have been delayed or cut before because of possible changes coming later and that I think that it's prudent to discuss the possibility here. That just happened in the last few weeks in the maps package because of the possibility of the acceptance of #61405. I think the same should be done with the transformative and sink functions for now, or at the very least those functions should be planned to stay in x/exp until some way to clean up the API is decided on, that's all.

One of my favorite things about Go is how slow and methodical it (usually) is in introducing new features. I think that the fact that it took over a decade to add generics is a good thing, and I really wanted generics. One of the purposes of that approach is to try avoid having to fix it later. Adding those functions in the proposed manner will almost definitely necessitate that later fix, and I very much would like to avoid that if at all possible.

gophun · 2023-08-10T14:34:01Z

Is that not the intention of these proposals? To build a standardized iterator system that works similarly to those?

Java Streams and .NET LINQ build on a standardized iterator system, but they are more than that. Both languages had a generic iterator system before. Iterators are useful without chaining or functional programming.

Why else is there a proposal here for Map(), Filter(), and Reduce(), among others?

That would be this very proposal, and it comes with a caveat: "... or perhaps not. There are concerns about how these would affect idiomatic Go code. "

This means that not everyone who has read these proposals in advance believes that this part is a good idea.

jba · 2023-08-10T16:26:54Z

While I don't know what the overall plans are, if you're hoping for Go to follow the path of Java Streams or C# LINQ, you might be in for disappointment.

Is that not the intention of these proposals? To build a standardized iterator system that works similarly to those? Why else is there a proposal here for Map(), Filter(), and Reduce(), among others?

Maybe chaining leads to too much of a good thing. It becomes more tempting to write long, hard-to-read chains of functions. You're less likely to do that if you have to nest calls.

As an analogy, Go has if. Isn't the intention of if to allow conditional execution? Why then shouldn't Go have the ternary operator ?:? Because it often leads to hard-to-read code.

rsc · 2023-08-10T16:50:12Z

Re #49085, generic methods either require (A) dynamic code generation or (B) terrible speed or (C) hiding those methods from dynamic interface checks or (D) not doing them at all. We have chosen option (D). The issue remains open like so many suggestions people have made, but I don't see a realistic path forward where we choose A, B, or C, nor do I see a fifth option. So it makes sense to assume generic methods are not going to happen and do our work accordingly.

Merovius · 2023-08-10T17:36:48Z

@DeedleFake The issue is not lack of understanding what a lack of parameterized methods means. It's just that, as @rsc said, wanting them doesn't make them feasible. The issue only being 2 years old is deceptive. The underlying problem is actually as old as Go and one of the main reasons we didn't have generics for most of that. Which you should consider, when you say

I think that the fact that it took over a decade to add generics is a good thing, and I really wanted generics.

We got generics by committing to keep implementation strategies open, thus avoiding the generics dilemma. Not having parametric methods is a pretty direct consequence of that decision.

DeedleFake · 2023-08-10T17:59:35Z

Well, I tried. If that's the decision then that's the decision. I'm disappointed, but I guess I'll just be satisfied with what I do like about the current proposal, even if it has, in my opinion, some fairly major problems. Sorry for dragging this a bit off-topic there.

thediveo · 2023-08-10T20:18:19Z

Hope that it's not noise: I wondered if naming it the sum parameter might be implying to the less experienced dev that reduce does only summation, so I looked at Javascript's array reduce: that uses accumulator. I don't know if that is much better, I just wanted to point it out. If anything, let's have a good laugh.

jub0bs · 2024-11-14T18:39:53Z

I have produced github.com/jub0bs/iterutil, more for practice and as a showcase than anything else. I don't really use it much yet.

I do like https://pkg.go.dev/github.com/jub0bs/iterutil#SortedFromMap, though; it's more efficient than simply chaining maps.Keys with slices.Sorted.

Merovius · 2024-11-14T19:33:07Z

@DeedleFake Not to nitpick, or anything, but

Overall, I find that I don't use it that much, but not because it isn't useful.

I would argue that "it is used often" is pretty much the definition of "useful".

DmitriyMV · 2024-11-15T00:32:39Z

@jub0bs

it's more efficient than simply chaining maps.Keys with slices.Sorted.

It can't be more efficient since it ranges over slices.Sorted(maps.Keys(m)) right after you did ks := keys(m); slices.Sort(ks). I think it's a typo.

jub0bs · 2024-11-15T08:34:48Z

@DmitriyMV Indeed! That is a typo. Thanks. This is what I had in mind:

func SortedFromMap[M ~map[K]V, K cmp.Ordered, V any](m M) iter.Seq2[K, V] {
	return func(yield func(K, V) bool) {
		ks := keys(m)
		slices.Sort(ks)
		for _, k := range ks {
			if !yield(k, m[k]) {
				return
			}
		}
	}
}

func keys[K comparable, V any](m map[K]V) []K {
	ks := make([]K, 0, len(m))
	for k := range m {
		ks = append(ks, k)
	}
	return ks
}

That should be more efficient than simply ranging over slices.Sorted(map.Keys(m)) because it uses the knowledge of the number of pairs in the map. I really need to add benchmarks to this library.

earthboundkid · 2024-11-15T14:33:04Z

Another way to do SortedFromMap would be to use a heap, so you don't have to pay the full cost of sorting if you end up breaking the loop before going through all items.

josharian · 2024-11-15T16:26:41Z

I looked through my code and the main themes I saw lined up almost precisely with @DeedleFake's #61898 (comment).

Using these adapters individually is noisy because of the verbosity of the inline function definitions. In some places, using a function or method works and reads well, but not always. proposal: spec: lightweight anonymous function syntax #21498 would help some, but even with that in place, the argument order to Map and friends is wrong--the function should come last.
Composing these adapters creates an "inside out" structure that is hard to read and write. (Ironically similar to C's typedefs that Go's design intentionally avoided.)

Consider counting the number of elements in a slice that have value x. You can do this with something like:

xiter.Len(xiter.Filter(func (t T) { return t == x }), slices.Values(s))

This is much harder to read than left-to-right (pseudo-code): s.Values().Filter(func (t T) { return t == x })).Len()

The difficulty of composition has led in practice to a proliferation of helpers that simply combine two or more steps.

A large proportion of uses apply to slices. The awkwardness of the inside out structure plus slices.Values and slices.Collect has led to slices-only copies of the most popular functions such as (note the corrected argument order):

func Mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
	return slices.Collect(Map(f, slices.Values(s)))
}

gophun · 2024-11-15T16:46:46Z

This is much harder to read than left-to-right (pseudo-code): s.Values().Filter(func (t T) { return t == x })).Len()

To be honest, it should read like this:

cnt := 0
for _, t := range s {
	if t == x {
		cnt++
	}
}

I thought this is what we as the Go community have agreed on in the past 15 years.

earthboundkid · 2024-11-15T16:50:25Z

I thought this is what we as the Go community have agreed on in the past 15 years.

I wouldn't say that this is settled, but this is what I was talking about when I said,

FWIW, I consider Map and Reduce very controversial because they will shift Go from using for-loops everywhere to using callback functions everywhere!

I do see a lot of advantage in the for-loop heavy style because everyone needs to know how to use for-loops anyway, and typically the clarity from using declarative names like "map" and "filter" ends up getting eaten up in other ways, like the length of callback signatures. It's not clear which way we want Go to evolve.

DeedleFake · 2024-11-15T16:53:00Z

@gophun

The main benefit of iterators over a manual for loop over some data structure is that they are essentially an abstraction of a loop as data, allowing it to, in pieces, be passed, modified, and dealt with across function calls. In my own projects, being able to pass something an iter.Seq has improved the code significantly, both in terms of cleanliness and in terms of necessary allocations.

Before this, in order to modify some data source and pass it to something, either that thing would need to know about the data source, breaking the abstraction, or I would need to allocate a slice to store the modified data in. Now I can apply some adapters and pass the resulting iter.Seq to the function and the function, effectively allowing me to pass the function a for loop. This also reduces the number of loops, as instead of needing to loop to create the slice and then loop again over the slice, the function the iter.Seq is passed to can be the only one looping.

josharian · 2024-11-15T16:57:40Z

For loops can be clear and concise. Functional programming style can be clear and concise. I would like to be able to reach for either.

The discussion here highlights how far we still are from the latter in Go. Iterators are wonderful and I'm glad we have them. But without lambdas and pipes (or their moral equivalents) we are not able to use them comfortably and composably.

jub0bs · 2024-11-15T17:02:55Z

@earthboundkid

Another way to do SortedFromMap would be to use a heap [...]

That's a good idea! I'll see if I can get that to work.

josharian · 2024-11-17T18:40:05Z

Spitballing, thinking about what we can do with the tools we have...

One way to move from inside-out toward RTL (not as good as LTR!) and slightly finesse the argument order problem is to return functions that transform iterators.

For example:

// Map converts f into an iterator mapper.
func Map[In, Out any](f func(In) Out) func(seq iter.Seq[In]) iter.Seq[Out] {
	return func(seq iter.Seq[In]) iter.Seq[Out] {
		return func(yield func(Out) bool) {
			for in := range seq {
				if !yield(f(in)) {
					return
				}
			}
		}
	}
}

// Limit returns a function that wraps seq to stop after n values.
func Limit[V any](n int) func(seq iter.Seq[V]) iter.Seq[V] {
	return func(seq iter.Seq[V]) iter.Seq[V] {
		return func(yield func(V) bool) {
			if n <= 0 {
				return
			}
			for v := range seq {
				if !yield(v) {
					return
				}
				if n--; n <= 0 {
					break
				}
			}
		}
	}
}

This adds a layer of indirection and leads to a LISP problem (lots of irritating silly parenthesis), but does slightly improve composability.

I wouldn't advocate for this as-is, but throwing it out there in case it sparks new ideas...

josharian · 2024-11-17T18:43:50Z

And switching from forest to trees...

Limit[2] would be slightly nicer to use with flags (my de facto primary use case for it) if it treated n < 0 as "unbounded" rather than as 0.

AlekSi · 2024-12-06T03:20:33Z

In #53987 (comment), @JeremyLoy proposed an iterator-based addition to (what become) slices.Chunk. I already needed it in multiple places, so I propose adding it. It would allow passing []E to functions that could process the whole slice at once instead of one E at a time. For example, given a MongoDB database and a generator of documents, one could use

for d := range xiter.Chunk(docs, 1000) {
    db.InsertMany(d) // sends a single insert command with up to 1000 documents
}

instead of less effective

for d := range docs {
    db.InsertOne(r) // sends a single insert command with a single document
}

In those examples, InsertMany and InsertOne are MongoDB driver's methods.

Just like slices.Chunk allows sub-slices to be used as iterators, xiter.Chunk would allow iterators to be used as (sub)slices.

// Chunk returns an iterator over consecutive slices of up to n elements of seq.
// All but the last slice will have size n.
// All slices are clipped to have no capacity beyond the length.
// If seq is empty, the sequence is empty: there is no empty slice in the sequence.
// Chunk panics if n is less than 1.
func Chunk[E any](seq iter.Seq[E], n int) iter.Seq[[]E] {
	if n < 1 {
		panic("cannot be less than 1")
	}

	return func(yield func([]E) bool) {
		var batch []E

		for e := range seq {
			if batch == nil {
				batch = make([]E, 0, n)
			}

			batch = append(batch, e)

			if len(batch) == n {
				if !yield(batch) {
					return
				}

				batch = nil
			}
		}

		if l := len(batch); l > 0 {
			yield(batch[:l:l])
		}
	}
}

DeedleFake · 2024-12-06T04:56:40Z

@AlekSi

I've got an implementation of that in my iterator support package. Mine reuses the slice for each chunk, leaving it up to the caller to copy them if they want to keep previous ones into the next iteration. I think that's the better approach as the user can make the decision that way. Another alternative is to allow the user to pass a slice to be used, with the capacity of the slice serving as the size of the chunks.

anatoly-kussul · 2024-12-20T03:30:49Z

@AlekSi

imo it would be more convenient if Chunk returned iter.Seq[iter.Seq[E]] instead of iter.Seq[[]E]
something like

// Chunk returns an iterator over consecutive iterators of up to n elements of seq.
// Chunk panics if n is less than 1.
func Chunk[E any](seq iter.Seq[E], n int) iter.Seq[iter.Seq[E]] {
	if n < 1 {
		panic("cannot be less than 1")
	}

	return func(yield func(iter.Seq[E]) bool) {
		var stopped bool
		take := func(next func() (E, bool), n int) iter.Seq[E] {
			return func(yield func(E) bool) {
				for range n {
					item, ok := next()
					if !ok || !yield(item) {
						stopped = true
						return
					}
				}
			}
		}

		next, stop := iter.Pull(seq)
		defer stop()
		for {
			if stopped || !yield(take(next, n)) {
				return
			}
		}
	}
}

you can then aggregate them to slices if needed like:

func ChunkSliced[E any](seq iter.Seq[E], n int) iter.Seq[[]E] {
	return func(yield func([]E) bool) {
		for chunkSeq := range Chunk(seq, n) {
			chunk := slices.Collect(chunkSeq)
			if len(chunk) == 0 || !yield(chunk) {
				return
			}
		}
	}
}

or

func ChunkSlicedInplace[E any](seq iter.Seq[E], chunk []E) iter.Seq[[]E] {
	return func(yield func([]E) bool) {
		for chunkSeq := range Chunk(seq, cap(chunk)) {
			clear(chunk)
			chunk = chunk[:0]
			chunk = slices.AppendSeq(chunk, chunkSeq)
			if len(chunk) == 0 || !yield(chunk) {
				return
			}
		}
	}
}

AlekSi · 2024-12-20T03:54:58Z

imo it would be more convenient if Chunk returned iter.Seq[iter.Seq[E]] instead of iter.Seq[[]E]

@anatoly-kussul I updated my example to show that your proposal would not work for existing functions that accept slices. And converting returned (sub)slices to iterators is already possible with slices.Values.

anatoly-kussul · 2024-12-20T04:13:19Z

@AlekSi

And converting returned (sub)slices to iterators is already possible with slices.Values.

This wouldn't really work for cases where both iterator and your logic are I/O heavy.

For example:
input Seq is some event broker consumer
for each message you want to make network request.
but after each batch you want to sleep for some time.

with Chunk returning iter.Seq[iter.Seq[E]] it would process each msg inside chunk as soon as it's ready and be as simple as:

for chunk := range Chunk(stream, n) {
   for msg := range chunk {
   	// do something
   }
   time.Sleep(cooldown)
}

while iter.Seq[[]E] approach would require to receive full batch first, which would be less efficient.

You can always convert iter.Seq[E] to []E without impacting flow logic, but not the other way around.

apparentlymart · 2024-12-20T20:04:51Z

Another situation where "sequences of sequences" can be useful is situations where each element of an input sequence can produce zero or more elements in a result but the boundaries between those sub-sequences are not important to the overall problem.

In situations like that (in other languages) I've found benefit in a "flatten" function, which I think would be spelled something like this in Go:

func Flatten[V any](seq iter.Seq[iter.Seq[V]]) iter.Seq[V]

Although I expect other uses are possible, the main use I have in mind is first using Map to project from some input sequence to an iter.Seq[iter.Seq[V]] and then using Flatten to transform that into an easier-to-consume iter.Seq[V] to return.

Using Flatten in conjunction with Chunk would of course be pretty self-defeating 😀 so the relevance here was only that sequences of sequences do tend to arise once you start doing iterator-combinator games like this library seems to be promoting, and what I've described here is another separate situation where one can end up holding nested sequences.

(I don't mean this comment to suggest that Flatten necessarily must be part of a first version of this package. I expect instead that we can wait to see how commonly it arises in practice, since it's not clear yet that Go programmers will make such extensive use of iterator "map" as is idiomatic in some other languages.)

earthboundkid · 2024-12-20T21:26:33Z

Python does the equivalent of iter.Seq[iter.Seq[T]] and I have found it extremely inconvenient.

You could also do cooldown with

for i, item := range enumerate(stream) {
   if i != 0 && i%chunkSize == 0 {
      time.Sleep(cooldown)
   }
   doSomething(item)
}

That seems clearer to me.

rsc added the Proposal label Aug 9, 2023

gopherbot added this to the Proposal milestone Aug 9, 2023

rsc mentioned this issue Aug 9, 2023

iter: new package for iterators #61897

Closed

ianlancetaylor added this to Proposals Aug 9, 2023

ianlancetaylor moved this to Incoming in Proposals Aug 9, 2023

rsc moved this from Incoming to Active in Proposals Aug 9, 2023

apparentlymart mentioned this issue Nov 14, 2024

proposal: math: add Mean, Median, Mode, Variance, and StdDev #69264

Open

This comment was marked as resolved.

Sign in to view

jonathansharman mentioned this issue Nov 21, 2024

proposal: spec: lightweight anonymous function syntax #21498

Open

hmahmood mentioned this issue Dec 2, 2024

in buffer batching with perf buffers for NPM DataDog/datadog-agent#31402

Open

jimmyfrasche mentioned this issue Dec 10, 2024

proposal: iter: add Map/Filter/Reduce functions #70761

Closed

apparentlymart mentioned this issue Dec 20, 2024

proposal: os: add iterator variant of File.ReadDir #70084

Open

proposal: x/exp/xiter: new package with iterator adapters #61898

proposal: x/exp/xiter: new package with iterator adapters #61898

Comments

rsc commented Aug 9, 2023 • edited by ianlancetaylor Loading

gophun commented Aug 9, 2023

gophun commented Aug 9, 2023

zephyrtronium commented Aug 9, 2023

earthboundkid commented Aug 9, 2023

earthboundkid commented Aug 9, 2023

DeedleFake commented Aug 9, 2023 • edited Loading

rsc commented Aug 9, 2023

DeedleFake commented Aug 10, 2023

ianlancetaylor commented Aug 10, 2023

hherman1 commented Aug 10, 2023

DeedleFake commented Aug 10, 2023 • edited Loading

hherman1 commented Aug 10, 2023 • edited Loading

benhoyt commented Aug 10, 2023

Merovius commented Aug 10, 2023 • edited Loading

DeedleFake commented Aug 10, 2023 • edited Loading

gophun commented Aug 10, 2023

Merovius commented Aug 10, 2023

DeedleFake commented Aug 10, 2023 • edited Loading

gophun commented Aug 10, 2023

jba commented Aug 10, 2023

rsc commented Aug 10, 2023 • edited Loading

Merovius commented Aug 10, 2023 • edited Loading

DeedleFake commented Aug 10, 2023

thediveo commented Aug 10, 2023

jub0bs commented Nov 14, 2024 • edited Loading

Merovius commented Nov 14, 2024

DmitriyMV commented Nov 15, 2024 • edited Loading

jub0bs commented Nov 15, 2024

earthboundkid commented Nov 15, 2024

josharian commented Nov 15, 2024

gophun commented Nov 15, 2024

earthboundkid commented Nov 15, 2024

DeedleFake commented Nov 15, 2024 • edited Loading

josharian commented Nov 15, 2024 • edited Loading

jub0bs commented Nov 15, 2024

josharian commented Nov 17, 2024

josharian commented Nov 17, 2024 • edited Loading

This comment was marked as resolved.

This comment was marked as resolved.

This comment was marked as resolved.

This comment was marked as resolved.

This comment was marked as resolved.

AlekSi commented Dec 6, 2024 • edited Loading

DeedleFake commented Dec 6, 2024

anatoly-kussul commented Dec 20, 2024

AlekSi commented Dec 20, 2024

anatoly-kussul commented Dec 20, 2024 • edited Loading

apparentlymart commented Dec 20, 2024

earthboundkid commented Dec 20, 2024 • edited Loading

rsc commented Aug 9, 2023 •

edited by ianlancetaylor

Loading

DeedleFake commented Aug 9, 2023 •

edited

Loading

DeedleFake commented Aug 10, 2023 •

edited

Loading

hherman1 commented Aug 10, 2023 •

edited

Loading

Merovius commented Aug 10, 2023 •

edited

Loading

DeedleFake commented Aug 10, 2023 •

edited

Loading

DeedleFake commented Aug 10, 2023 •

edited

Loading

rsc commented Aug 10, 2023 •

edited

Loading

Merovius commented Aug 10, 2023 •

edited

Loading

jub0bs commented Nov 14, 2024 •

edited

Loading

DmitriyMV commented Nov 15, 2024 •

edited

Loading

DeedleFake commented Nov 15, 2024 •

edited

Loading

josharian commented Nov 15, 2024 •

edited

Loading

josharian commented Nov 17, 2024 •

edited

Loading

AlekSi commented Dec 6, 2024 •

edited

Loading

anatoly-kussul commented Dec 20, 2024 •

edited

Loading

earthboundkid commented Dec 20, 2024 •

edited

Loading