From 65563ab988cbfba2206cb2af3097a130a52aa278 Mon Sep 17 00:00:00 2001 From: Kyle Ellrott Date: Thu, 25 Mar 2021 17:06:17 -0700 Subject: [PATCH] Setting option to allow mongo aggregation pipeline to use disk based temp files. This fixes issue around `distinct` call failing on large datasets --- mongo/processor.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mongo/processor.go b/mongo/processor.go index 199868d7..ee26c5b1 100644 --- a/mongo/processor.go +++ b/mongo/processor.go @@ -12,6 +12,7 @@ import ( "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" ) // Processor stores the information for a mongo aggregation pipeline @@ -56,7 +57,8 @@ func (proc *Processor) Process(ctx context.Context, man gdbi.Manager, in gdbi.In for t := range in { nResults := 0 //plog.Infof("Running: %#v", proc.query) - cursor, err := initCol.Aggregate(ctx, proc.query) + trueVal := true + cursor, err := initCol.Aggregate(ctx, proc.query, &options.AggregateOptions{AllowDiskUse: &trueVal}) if err != nil { plog.Errorf("Query Error (%s) : %s", proc.query, err) continue