diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index bf536c349cb2a..ab71cb801863b 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -2624,7 +2624,7 @@ object functions { * @group math_funcs * @since 3.5.0 */ - def ln(e: Column): Column = log(e) + def ln(e: Column): Column = Column.fn("ln", e) /** * Computes the natural logarithm of the given value. @@ -3503,7 +3503,7 @@ object functions { mode: Column, padding: Column, aad: Column): Column = - Column.fn("aes_encrypt", input, key, mode, padding, aad) + Column.fn("aes_decrypt", input, key, mode, padding, aad) /** * Returns a decrypted value of `input`. @@ -3515,7 +3515,7 @@ object functions { * @since 3.5.0 */ def aes_decrypt(input: Column, key: Column, mode: Column, padding: Column): Column = - Column.fn("aes_encrypt", input, key, mode, padding) + Column.fn("aes_decrypt", input, key, mode, padding) /** * Returns a decrypted value of `input`. @@ -3527,7 +3527,7 @@ object functions { * @since 3.5.0 */ def aes_decrypt(input: Column, key: Column, mode: Column): Column = - Column.fn("aes_encrypt", input, key, mode) + Column.fn("aes_decrypt", input, key, mode) /** * Returns a decrypted value of `input`. @@ -3539,7 +3539,7 @@ object functions { * @since 3.5.0 */ def aes_decrypt(input: Column, key: Column): Column = - Column.fn("aes_encrypt", input, key) + Column.fn("aes_decrypt", input, key) /** * This is a special version of `aes_decrypt` that performs the same operation, but returns a diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain index 44084a8e60fb0..31e03b79eb987 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain @@ -1,2 +1,2 @@ -Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, GCM, DEFAULT, , )#0] +Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, GCM, DEFAULT, )#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain index 29ccf0c1c833f..fc572e8fe7c67 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain @@ -1,2 +1,2 @@ -Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, DEFAULT, , )#0] +Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, DEFAULT, )#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain index 5591363426ab5..c6c693013dd0a 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain @@ -1,2 +1,2 @@ -Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, , )#0] +Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, )#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain index 0e8d4df71b38e..97bb528b84b3f 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain @@ -1,2 +1,2 @@ -Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, g, )#0] +Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, g)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain index d3c3743b1ef40..66b782ac8170d 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain @@ -1,2 +1,2 @@ -Project [LOG(E(), b#0) AS LOG(E(), b)#0] +Project [ln(b#0) AS ln(b)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json index 06469d4840547..4204a44b44ce0 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json @@ -13,7 +13,7 @@ }, "expressions": [{ "unresolvedFunction": { - "functionName": "aes_encrypt", + "functionName": "aes_decrypt", "arguments": [{ "unresolvedAttribute": { "unparsedIdentifier": "g" diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin index c7a70b51707f3..f635e1fc689b1 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json index 7eb9b4ed8b4ed..9c630e1253494 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json @@ -13,7 +13,7 @@ }, "expressions": [{ "unresolvedFunction": { - "functionName": "aes_encrypt", + "functionName": "aes_decrypt", "arguments": [{ "unresolvedAttribute": { "unparsedIdentifier": "g" diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin index ecd81ae44fcbd..41d024cdb7eed 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json index 59a6a5e35fd42..8f5be474ab4b3 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json @@ -13,7 +13,7 @@ }, "expressions": [{ "unresolvedFunction": { - "functionName": "aes_encrypt", + "functionName": "aes_decrypt", "arguments": [{ "unresolvedAttribute": { "unparsedIdentifier": "g" diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin index 9de01ddc5ea69..cd6764581f2ca 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json index a87ec1b7f4d29..9381042b71886 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json @@ -13,7 +13,7 @@ }, "expressions": [{ "unresolvedFunction": { - "functionName": "aes_encrypt", + "functionName": "aes_decrypt", "arguments": [{ "unresolvedAttribute": { "unparsedIdentifier": "g" diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin index 13da507fe6ff4..ca789f04ce1d4 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin differ diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json index 1b2d0ed0b1447..ababbc52d088d 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json @@ -13,7 +13,7 @@ }, "expressions": [{ "unresolvedFunction": { - "functionName": "log", + "functionName": "ln", "arguments": [{ "unresolvedAttribute": { "unparsedIdentifier": "b" diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin index 548fb480dd27e..ecb87a1fc4102 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin differ diff --git a/core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js b/core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js index 037316f4e4f38..d76e958f0e373 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js +++ b/core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js @@ -1,4 +1,347 @@ -/*v0.5.2*/(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o1?arguments[1]:{},peg$FAILED={},peg$startRuleFunctions={start:peg$parsestart,graphStmt:peg$parsegraphStmt},peg$startRuleFunction=peg$parsestart,peg$c0=[],peg$c1=peg$FAILED,peg$c2=null,peg$c3="{",peg$c4={type:"literal",value:"{",description:'"{"'},peg$c5="}",peg$c6={type:"literal",value:"}",description:'"}"'},peg$c7=function(strict,type,id,stmts){return{type:type,id:id,strict:strict!==null,stmts:stmts}},peg$c8=";",peg$c9={type:"literal",value:";",description:'";"'},peg$c10=function(first,rest){var result=[first];for(var i=0;i",description:'"->"'},peg$c33=function(rhs,rest){var result=[rhs];if(rest){for(var i=0;ipos){peg$cachedPos=0;peg$cachedPosDetails={line:1,column:1,seenCR:false}}advance(peg$cachedPosDetails,peg$cachedPos,pos);peg$cachedPos=pos}return peg$cachedPosDetails}function peg$fail(expected){if(peg$currPospeg$maxFailPos){peg$maxFailPos=peg$currPos;peg$maxFailExpected=[]}peg$maxFailExpected.push(expected)}function peg$buildException(message,expected,pos){function cleanupExpected(expected){var i=1;expected.sort(function(a,b){if(a.descriptionb.description){return 1}else{return 0}});while(i1?expectedDescs.slice(0,-1).join(", ")+" or "+expectedDescs[expected.length-1]:expectedDescs[0];foundDesc=found?'"'+stringEscape(found)+'"':"end of input";return"Expected "+expectedDesc+" but "+foundDesc+" found."}var posDetails=peg$computePosDetails(pos),found=pospeg$currPos){s5=input.charAt(peg$currPos);peg$currPos++}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c110)}}if(s5!==peg$FAILED){s4=[s4,s5];s3=s4}else{peg$currPos=s3;s3=peg$c1}}else{peg$currPos=s3;s3=peg$c1}while(s3!==peg$FAILED){s2.push(s3);s3=peg$currPos;s4=peg$currPos;peg$silentFails++;if(input.substr(peg$currPos,2)===peg$c108){s5=peg$c108;peg$currPos+=2}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c109)}}peg$silentFails--;if(s5===peg$FAILED){s4=peg$c30}else{peg$currPos=s4;s4=peg$c1}if(s4!==peg$FAILED){if(input.length>peg$currPos){s5=input.charAt(peg$currPos);peg$currPos++}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c110)}}if(s5!==peg$FAILED){s4=[s4,s5];s3=s4}else{peg$currPos=s3;s3=peg$c1}}else{peg$currPos=s3;s3=peg$c1}}if(s2!==peg$FAILED){if(input.substr(peg$currPos,2)===peg$c108){s3=peg$c108;peg$currPos+=2}else{s3=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c109)}}if(s3!==peg$FAILED){s1=[s1,s2,s3];s0=s1}else{peg$currPos=s0;s0=peg$c1}}else{peg$currPos=s0;s0=peg$c1}}else{peg$currPos=s0;s0=peg$c1}}peg$silentFails--;if(s0===peg$FAILED){s1=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c101)}}return s0}function peg$parse_(){var s0;s0=peg$parsewhitespace();if(s0===peg$FAILED){s0=peg$parsecomment()}return s0}var _=require("lodash");var directed;peg$result=peg$startRuleFunction();if(peg$result!==peg$FAILED&&peg$currPos===input.length){return peg$result}else{if(peg$result!==peg$FAILED&&peg$currPos":"--",writer=new Writer;if(!g.isMultigraph()){writer.write("strict ")}writer.writeLine((g.isDirected()?"digraph":"graph")+" {");writer.indent();var graphAttrs=g.graph();if(_.isObject(graphAttrs)){_.each(graphAttrs,function(v,k){writer.writeLine(id(k)+"="+id(v)+";")})}writeSubgraph(g,undefined,writer);g.edges().forEach(function(edge){writeEdge(g,edge,ec,writer)});writer.unindent();writer.writeLine("}");return writer.toString()}function writeSubgraph(g,v,writer){var children=g.isCompound()?g.children(v):g.nodes();_.each(children,function(w){if(!g.isCompound()||!g.children(w).length){writeNode(g,w,writer)}else{writer.writeLine("subgraph "+id(w)+" {");writer.indent();if(_.isObject(g.node(w))){_.map(g.node(w),function(val,key){writer.writeLine(id(key)+"="+id(val)+";")})}writeSubgraph(g,w,writer);writer.unindent();writer.writeLine("}")}})}function writeNode(g,v,writer){writer.write(id(v));writeAttrs(g.node(v),writer);writer.writeLine()}function writeEdge(g,edge,ec,writer){var v=edge.v,w=edge.w,attrs=g.edge(edge);writer.write(id(v)+" "+ec+" "+id(w));writeAttrs(attrs,writer);writer.writeLine()}function writeAttrs(attrs,writer){if(_.isObject(attrs)){var attrStrs=_.map(attrs,function(val,key){return id(key)+"="+id(val)});if(attrStrs.length){writer.write(" ["+attrStrs.join(",")+"]")}}}function id(obj){if(typeof obj==="number"||obj.toString().match(UNESCAPED_ID_PATTERN)){return obj}return'"'+obj.toString().replace(/"/g,'\\"')+'"'}function Writer(){this._indent="";this._content="";this._shouldIndent=true}Writer.prototype.INDENT=" ";Writer.prototype.indent=function(){this._indent+=this.INDENT};Writer.prototype.unindent=function(){this._indent=this._indent.slice(this.INDENT.length)};Writer.prototype.writeLine=function(line){this.write((line||"")+"\n");this._shouldIndent=true};Writer.prototype.write=function(str){if(this._shouldIndent){this._shouldIndent=false;this._content+=this._indent}this._content+=str};Writer.prototype.toString=function(){return this._content}},{lodash:28}],9:[function(require,module,exports){var _=require("lodash");module.exports=_.clone(require("./lib"));module.exports.json=require("./lib/json");module.exports.alg=require("./lib/alg")},{"./lib":25,"./lib/alg":16,"./lib/json":26,lodash:28}],10:[function(require,module,exports){var _=require("lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{lodash:28}],11:[function(require,module,exports){var _=require("lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{lodash:28}],12:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"./dijkstra":13,lodash:28}],13:[function(require,module,exports){var _=require("lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":23,lodash:28}],14:[function(require,module,exports){var _=require("lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"./tarjan":21,lodash:28}],15:[function(require,module,exports){var _=require("lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":23,"../graph":24,lodash:28}],21:[function(require,module,exports){var _=require("lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{lodash:28}],22:[function(require,module,exports){var _=require("lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{lodash:28}],23:[function(require,module,exports){var _=require("lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l>1;if(arr[parent].priority1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(v,w,value,name){var valueSpecified=arguments.length>2;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{lodash:28}],25:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":24,"./version":27}],26:[function(require,module,exports){var _=require("lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":24,lodash:28}],27:[function(require,module,exports){module.exports="0.8.1"},{}],28:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" \f "+"\n\r\u2028\u2029"+" ᠎              ";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r"," ":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++indexother||typeof value=="undefined"){return 1}if(value/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length=largeArraySize&&indexOf===baseIndexOf,result=[];if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index":">",'"':""","'":"'"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++indexresult){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index>>1;callback(array[mid])1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId)}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++indexhandleStmt(g,stmt,defaultStack))}return g}function handleStmt(g,stmt,defaultStack,sg){switch(stmt.type){case"node":handleNodeStmt(g,stmt,defaultStack,sg);break;case"edge":handleEdgeStmt(g,stmt,defaultStack,sg);break;case"subgraph":handleSubgraphStmt(g,stmt,defaultStack,sg);break;case"attr":handleAttrStmt(g,stmt,defaultStack);break;case"inlineAttr":handleInlineAttrsStmt(g,stmt,defaultStack,sg);break}}function handleNodeStmt(g,stmt,defaultStack,sg){var v=stmt.id,attrs=stmt.attrs;maybeCreateNode(g,v,defaultStack,sg);Object.assign(g.node(v),attrs)}function handleEdgeStmt(g,stmt,defaultStack,sg){var attrs=stmt.attrs,prev,curr;stmt.elems.forEach(elem=>{handleStmt(g,elem,defaultStack,sg);switch(elem.type){case"node":curr=[elem.id];break;case"subgraph":curr=collectNodeIds(elem);break}if(prev){prev.forEach(v=>{curr.forEach(w=>{var name;if(g.hasEdge(v,w)&&g.isMultigraph()){name=uniqueId("edge")}if(!g.hasEdge(v,w,name)){g.setEdge(v,w,structuredClone(defaultStack[defaultStack.length-1].edge),name)}Object.assign(g.edge(v,w,name),attrs)})})}prev=curr})}function handleSubgraphStmt(g,stmt,defaultStack,sg){var id=stmt.id;if(id===undefined){id=generateSubgraphId(g)}defaultStack.push(structuredClone(defaultStack[defaultStack.length-1]));maybeCreateNode(g,id,defaultStack,sg);if(stmt.stmts){stmt.stmts.forEach(s=>{handleStmt(g,s,defaultStack,id)})} +// If there are no statements remove the subgraph +if(!g.children(id).length){g.removeNode(id)}defaultStack.pop()}function handleAttrStmt(g,stmt,defaultStack){Object.assign(defaultStack[defaultStack.length-1][stmt.attrType],stmt.attrs)}function handleInlineAttrsStmt(g,stmt,defaultStack,sg){Object.assign(sg?g.node(sg):g.graph(),stmt.attrs)}function generateSubgraphId(g){var id;do{id=uniqueId("sg")}while(g.hasNode(id));return id}function maybeCreateNode(g,v,defaultStack,sg){if(!g.hasNode(v)){g.setNode(v,structuredClone(defaultStack[defaultStack.length-1].node));g.setParent(v,sg)}} +// Collect all nodes involved in a subgraph statement +function collectNodeIds(stmt){var ids={},stack=[],curr;var push=stack.push.bind(stack);push(stmt);while(stack.length){curr=stack.pop();switch(curr.type){case"node":ids[curr.id]=true;break;case"edge":curr.elems.forEach(push);break;case"subgraph":curr.stmts.forEach(push);break}}return Object.keys(ids)}let idCounter=0;function uniqueId(prefix){var id=++idCounter;return toString(prefix)+id}},{"@dagrejs/graphlib":9}],4:[function(require,module,exports){module.exports=function(){ +/* + * Generated by PEG.js 0.8.0. + * + * http://pegjs.majda.cz/ + */ +function peg$subclass(child,parent){function ctor(){this.constructor=child}ctor.prototype=parent.prototype;child.prototype=new ctor}function SyntaxError(message,expected,found,offset,line,column){this.message=message;this.expected=expected;this.found=found;this.offset=offset;this.line=line;this.column=column;this.name="SyntaxError"}peg$subclass(SyntaxError,Error);function parse(input){var options=arguments.length>1?arguments[1]:{},peg$FAILED={},peg$startRuleFunctions={start:peg$parsestart,graphStmt:peg$parsegraphStmt},peg$startRuleFunction=peg$parsestart,peg$c0=[],peg$c1=peg$FAILED,peg$c2=null,peg$c3="{",peg$c4={type:"literal",value:"{",description:'"{"'},peg$c5="}",peg$c6={type:"literal",value:"}",description:'"}"'},peg$c7=function(strict,type,id,stmts){return{type:type,id:id,strict:strict!==null,stmts:stmts}},peg$c8=";",peg$c9={type:"literal",value:";",description:'";"'},peg$c10=function(first,rest){var result=[first];for(var i=0;i",description:'"->"'},peg$c33=function(rhs,rest){var result=[rhs];if(rest){for(var i=0;ipos){peg$cachedPos=0;peg$cachedPosDetails={line:1,column:1,seenCR:false}}advance(peg$cachedPosDetails,peg$cachedPos,pos);peg$cachedPos=pos}return peg$cachedPosDetails}function peg$fail(expected){if(peg$currPospeg$maxFailPos){peg$maxFailPos=peg$currPos;peg$maxFailExpected=[]}peg$maxFailExpected.push(expected)}function peg$buildException(message,expected,pos){function cleanupExpected(expected){var i=1;expected.sort(function(a,b){if(a.descriptionb.description){return 1}else{return 0}});while(i1?expectedDescs.slice(0,-1).join(", ")+" or "+expectedDescs[expected.length-1]:expectedDescs[0];foundDesc=found?'"'+stringEscape(found)+'"':"end of input";return"Expected "+expectedDesc+" but "+foundDesc+" found."}var posDetails=peg$computePosDetails(pos),found=pospeg$currPos){s5=input.charAt(peg$currPos);peg$currPos++}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c110)}}if(s5!==peg$FAILED){s4=[s4,s5];s3=s4}else{peg$currPos=s3;s3=peg$c1}}else{peg$currPos=s3;s3=peg$c1}while(s3!==peg$FAILED){s2.push(s3);s3=peg$currPos;s4=peg$currPos;peg$silentFails++;if(input.substr(peg$currPos,2)===peg$c108){s5=peg$c108;peg$currPos+=2}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c109)}}peg$silentFails--;if(s5===peg$FAILED){s4=peg$c30}else{peg$currPos=s4;s4=peg$c1}if(s4!==peg$FAILED){if(input.length>peg$currPos){s5=input.charAt(peg$currPos);peg$currPos++}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c110)}}if(s5!==peg$FAILED){s4=[s4,s5];s3=s4}else{peg$currPos=s3;s3=peg$c1}}else{peg$currPos=s3;s3=peg$c1}}if(s2!==peg$FAILED){if(input.substr(peg$currPos,2)===peg$c108){s3=peg$c108;peg$currPos+=2}else{s3=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c109)}}if(s3!==peg$FAILED){s1=[s1,s2,s3];s0=s1}else{peg$currPos=s0;s0=peg$c1}}else{peg$currPos=s0;s0=peg$c1}}else{peg$currPos=s0;s0=peg$c1}}peg$silentFails--;if(s0===peg$FAILED){s1=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c101)}}return s0}function peg$parse_(){var s0;s0=peg$parsewhitespace();if(s0===peg$FAILED){s0=peg$parsecomment()}return s0}var directed;peg$result=peg$startRuleFunction();if(peg$result!==peg$FAILED&&peg$currPos===input.length){return peg$result}else{if(peg$result!==peg$FAILED&&peg$currPos":"--";var writer=new Writer;if(!g.isMultigraph()){writer.write("strict ")}writer.writeLine((g.isDirected()?"digraph":"graph")+" {");writer.indent();var graphAttrs=g.graph();if(typeof graphAttrs==="object"){Object.entries(graphAttrs).forEach(([k,v])=>{writer.writeLine(id(k)+"="+id(v)+";")})}writeSubgraph(g,undefined,writer);g.edges().forEach(function(edge){writeEdge(g,edge,ec,writer)});writer.unindent();writer.writeLine("}");return writer.toString()}function writeSubgraph(g,v,writer){var children=g.isCompound()?g.children(v):g.nodes();children.forEach(w=>{if(!g.isCompound()||!g.children(w).length){writeNode(g,w,writer)}else{writer.writeLine("subgraph "+id(w)+" {");writer.indent();if(typeof g.node(w)==="object"){Object.entries(g.node(w)).map(([key,val])=>{writer.writeLine(id(key)+"="+id(val)+";")})}writeSubgraph(g,w,writer);writer.unindent();writer.writeLine("}")}})}function writeNode(g,v,writer){writer.write(id(v));writeAttrs(g.node(v),writer);writer.writeLine()}function writeEdge(g,edge,ec,writer){var v=edge.v;var w=edge.w;var attrs=g.edge(edge);writer.write(id(v)+" "+ec+" "+id(w));writeAttrs(attrs,writer);writer.writeLine()}function writeAttrs(attrs,writer){if(typeof attrs==="object"){var attrStrs=Object.entries(attrs).map(([key,val])=>id(key)+"="+id(val));if(attrStrs.length){writer.write(" ["+attrStrs.join(",")+"]")}}}function id(obj){if(typeof obj==="number"||obj.toString().match(UNESCAPED_ID_PATTERN)){return obj}return'"'+obj.toString().replace(/"/g,'\\"')+'"'} +// Helper object for making a pretty printer +function Writer(){this._indent="";this._content="";this._shouldIndent=true}Writer.prototype.INDENT=" ";Writer.prototype.indent=function(){this._indent+=this.INDENT};Writer.prototype.unindent=function(){this._indent=this._indent.slice(this.INDENT.length)};Writer.prototype.writeLine=function(line){this.write((line||"")+"\n");this._shouldIndent=true};Writer.prototype.write=function(str){if(this._shouldIndent){this._shouldIndent=false;this._content+=this._indent}this._content+=str};Writer.prototype.toString=function(){return this._content}},{}],9:[function(require,module,exports){ +/** + * Copyright (c) 2014, Chris Pettitt + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":25,"./lib/alg":16,"./lib/json":26}],10:[function(require,module,exports){module.exports=components;function components(g){var visited={};var cmpts=[];var cmpt;function dfs(v){if(visited.hasOwnProperty(v))return;visited[v]=true;cmpt.push(v);g.successors(v).forEach(dfs);g.predecessors(v).forEach(dfs)}g.nodes().forEach(function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{}],11:[function(require,module,exports){module.exports=dfs; +/* + * A helper that preforms a pre- or post-order traversal on the input graph + * and returns the nodes in the order they were visited. If the graph is + * undirected then this algorithm will navigate using neighbors. If the graph + * is directed then this algorithm will navigate using successors. + * + * If the order is not "post", it will be treated as "pre". + */function dfs(g,vs,order){if(!Array.isArray(vs)){vs=[vs]}var navigation=g.isDirected()?v=>g.successors(v):v=>g.neighbors(v);var orderFunc=order==="post"?postOrderDfs:preOrderDfs;var acc=[];var visited={};vs.forEach(v=>{if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}orderFunc(v,navigation,visited,acc)});return acc}function postOrderDfs(v,navigation,visited,acc){var stack=[[v,false]];while(stack.length>0){var curr=stack.pop();if(curr[1]){acc.push(curr[0])}else{if(!visited.hasOwnProperty(curr[0])){visited[curr[0]]=true;stack.push([curr[0],true]);forEachRight(navigation(curr[0]),w=>stack.push([w,false]))}}}}function preOrderDfs(v,navigation,visited,acc){var stack=[v];while(stack.length>0){var curr=stack.pop();if(!visited.hasOwnProperty(curr)){visited[curr]=true;acc.push(curr);forEachRight(navigation(curr),w=>stack.push(w))}}}function forEachRight(array,iteratee){var length=array.length;while(length--){iteratee(array[length],length,array)}return array}},{}],12:[function(require,module,exports){var dijkstra=require("./dijkstra");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return g.nodes().reduce(function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc);return acc},{})}},{"./dijkstra":13}],13:[function(require,module,exports){var PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=()=>1;function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={};var pq=new PriorityQueue;var v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w;var wEntry=results[w];var weight=weightFn(edge);var distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":23}],14:[function(require,module,exports){var tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return tarjan(g).filter(function(cmpt){return cmpt.length>1||cmpt.length===1&&g.hasEdge(cmpt[0],cmpt[0])})}},{"./tarjan":21}],15:[function(require,module,exports){module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=()=>1;function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={};var nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v;var d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance0){v=pq.removeMin();if(parents.hasOwnProperty(v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":23,"../graph":24}],21:[function(require,module,exports){module.exports=tarjan;function tarjan(g){var index=0;var stack=[];var visited={};// node id -> { onStack, lowlink, index } +var results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!visited.hasOwnProperty(w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[];var w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!visited.hasOwnProperty(v)){dfs(v)}});return results}},{}],22:[function(require,module,exports){function topsort(g){var visited={};var stack={};var results=[];function visit(node){if(stack.hasOwnProperty(node)){throw new CycleException}if(!visited.hasOwnProperty(node)){stack[node]=true;visited[node]=true;g.predecessors(node).forEach(visit);delete stack[node];results.push(node)}}g.sinks().forEach(visit);if(Object.keys(visited).length!==g.nodeCount()){throw new CycleException}return results}class CycleException extends Error{constructor(){super(...arguments)}}module.exports=topsort;topsort.CycleException=CycleException},{}],23:[function(require,module,exports){ +/** + * A min-priority queue data structure. This algorithm is derived from Cormen, + * et al., "Introduction to Algorithms". The basic idea of a min-priority + * queue is that you can efficiently (in O(1) time) get the smallest key in + * the queue. Adding and removing elements takes O(log n) time. A key can + * have its priority decreased in O(log n) time. + */ +class PriorityQueue{#arr=[];#keyIndices={}; +/** + * Returns the number of elements in the queue. Takes `O(1)` time. + */size(){return this.#arr.length} +/** + * Returns the keys that are in the queue. Takes `O(n)` time. + */keys(){return this.#arr.map(function(x){return x.key})} +/** + * Returns `true` if **key** is in the queue and `false` if not. + */has(key){return this.#keyIndices.hasOwnProperty(key)} +/** + * Returns the priority for **key**. If **key** is not present in the queue + * then this function returns `undefined`. Takes `O(1)` time. + * + * @param {Object} key + */priority(key){var index=this.#keyIndices[key];if(index!==undefined){return this.#arr[index].priority}} +/** + * Returns the key for the minimum element in this queue. If the queue is + * empty this function throws an Error. Takes `O(1)` time. + */min(){if(this.size()===0){throw new Error("Queue underflow")}return this.#arr[0].key} +/** + * Inserts a new key into the priority queue. If the key already exists in + * the queue this function returns `false`; otherwise it will return `true`. + * Takes `O(n)` time. + * + * @param {Object} key the key to add + * @param {Number} priority the initial priority for the key + */add(key,priority){var keyIndices=this.#keyIndices;key=String(key);if(!keyIndices.hasOwnProperty(key)){var arr=this.#arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this.#decrease(index);return true}return false} +/** + * Removes and returns the smallest key in the queue. Takes `O(log n)` time. + */removeMin(){this.#swap(0,this.#arr.length-1);var min=this.#arr.pop();delete this.#keyIndices[min.key];this.#heapify(0);return min.key} +/** + * Decreases the priority for **key** to **priority**. If the new priority is + * greater than the previous priority, this function will throw an Error. + * + * @param {Object} key the key for which to raise priority + * @param {Number} priority the new priority for the key + */decrease(key,priority){var index=this.#keyIndices[key];if(priority>this.#arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this.#arr[index].priority+" New: "+priority)}this.#arr[index].priority=priority;this.#decrease(index)}#heapify(i){var arr=this.#arr;var l=2*i;var r=l+1;var largest=i;if(l>1;if(arr[parent].priorityundefined; +// Defaults to be set when creating a new edge +#defaultEdgeLabelFn=()=>undefined; +// v -> label +#nodes={}; +// v -> edgeObj +#in={}; +// u -> v -> Number +#preds={}; +// v -> edgeObj +#out={}; +// v -> w -> Number +#sucs={}; +// e -> edgeObj +#edgeObjs={}; +// e -> label +#edgeLabels={}; +/* Number of nodes in the graph. Should only be changed by the implementation. */#nodeCount=0; +/* Number of edges in the graph. Should only be changed by the implementation. */#edgeCount=0;#parent;#children;constructor(opts){if(opts){this.#isDirected=opts.hasOwnProperty("directed")?opts.directed:true;this.#isMultigraph=opts.hasOwnProperty("multigraph")?opts.multigraph:false;this.#isCompound=opts.hasOwnProperty("compound")?opts.compound:false}if(this.#isCompound){ +// v -> parent +this.#parent={}; +// v -> children +this.#children={};this.#children[GRAPH_NODE]={}}} +/* === Graph functions ========= */ +/** + * Whether graph was created with 'directed' flag set to true or not. + */isDirected(){return this.#isDirected} +/** + * Whether graph was created with 'multigraph' flag set to true or not. + */isMultigraph(){return this.#isMultigraph} +/** + * Whether graph was created with 'compound' flag set to true or not. + */isCompound(){return this.#isCompound} +/** + * Sets the label of the graph. + */setGraph(label){this.#label=label;return this} +/** + * Gets the graph label. + */graph(){return this.#label} +/* === Node functions ========== */ +/** + * Sets the default node label. If newDefault is a function, it will be + * invoked ach time when setting a label for a node. Otherwise, this label + * will be assigned as default label in case if no label was specified while + * setting a node. + * Complexity: O(1). + */setDefaultNodeLabel(newDefault){this.#defaultNodeLabelFn=newDefault;if(typeof newDefault!=="function"){this.#defaultNodeLabelFn=()=>newDefault}return this} +/** + * Gets the number of nodes in the graph. + * Complexity: O(1). + */nodeCount(){return this.#nodeCount} +/** + * Gets all nodes of the graph. Note, the in case of compound graph subnodes are + * not included in list. + * Complexity: O(1). + */nodes(){return Object.keys(this.#nodes)} +/** + * Gets list of nodes without in-edges. + * Complexity: O(|V|). + */sources(){var self=this;return this.nodes().filter(v=>Object.keys(self.#in[v]).length===0)} +/** + * Gets list of nodes without out-edges. + * Complexity: O(|V|). + */sinks(){var self=this;return this.nodes().filter(v=>Object.keys(self.#out[v]).length===0)} +/** + * Invokes setNode method for each node in names list. + * Complexity: O(|names|). + */setNodes(vs,value){var args=arguments;var self=this;vs.forEach(function(v){if(args.length>1){self.setNode(v,value)}else{self.setNode(v)}});return this} +/** + * Creates or updates the value for the node v in the graph. If label is supplied + * it is set as the value for the node. If label is not supplied and the node was + * created by this call then the default node label will be assigned. + * Complexity: O(1). + */setNode(v,value){if(this.#nodes.hasOwnProperty(v)){if(arguments.length>1){this.#nodes[v]=value}return this}this.#nodes[v]=arguments.length>1?value:this.#defaultNodeLabelFn(v);if(this.#isCompound){this.#parent[v]=GRAPH_NODE;this.#children[v]={};this.#children[GRAPH_NODE][v]=true}this.#in[v]={};this.#preds[v]={};this.#out[v]={};this.#sucs[v]={};++this.#nodeCount;return this} +/** + * Gets the label of node with specified name. + * Complexity: O(|V|). + */node(v){return this.#nodes[v]} +/** + * Detects whether graph has a node with specified name or not. + */hasNode(v){return this.#nodes.hasOwnProperty(v)} +/** + * Remove the node with the name from the graph or do nothing if the node is not in + * the graph. If the node was removed this function also removes any incident + * edges. + * Complexity: O(1). + */removeNode(v){var self=this;if(this.#nodes.hasOwnProperty(v)){var removeEdge=e=>self.removeEdge(self.#edgeObjs[e]);delete this.#nodes[v];if(this.#isCompound){this.#removeFromParentsChildList(v);delete this.#parent[v];this.children(v).forEach(function(child){self.setParent(child)});delete this.#children[v]}Object.keys(this.#in[v]).forEach(removeEdge);delete this.#in[v];delete this.#preds[v];Object.keys(this.#out[v]).forEach(removeEdge);delete this.#out[v];delete this.#sucs[v];--this.#nodeCount}return this} +/** + * Sets node p as a parent for node v if it is defined, or removes the + * parent for v if p is undefined. Method throws an exception in case of + * invoking it in context of noncompound graph. + * Average-case complexity: O(1). + */setParent(v,parent){if(!this.#isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(parent===undefined){parent=GRAPH_NODE}else{ +// Coerce parent to string +parent+="";for(var ancestor=parent;ancestor!==undefined;ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create a cycle")}}this.setNode(parent)}this.setNode(v);this.#removeFromParentsChildList(v);this.#parent[v]=parent;this.#children[parent][v]=true;return this}#removeFromParentsChildList(v){delete this.#children[this.#parent[v]][v]} +/** + * Gets parent node for node v. + * Complexity: O(1). + */parent(v){if(this.#isCompound){var parent=this.#parent[v];if(parent!==GRAPH_NODE){return parent}}} +/** + * Gets list of direct children of node v. + * Complexity: O(1). + */children(v=GRAPH_NODE){if(this.#isCompound){var children=this.#children[v];if(children){return Object.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}} +/** + * Return all nodes that are predecessors of the specified node or undefined if node v is not in + * the graph. Behavior is undefined for undirected graphs - use neighbors instead. + * Complexity: O(|V|). + */predecessors(v){var predsV=this.#preds[v];if(predsV){return Object.keys(predsV)}} +/** + * Return all nodes that are successors of the specified node or undefined if node v is not in + * the graph. Behavior is undefined for undirected graphs - use neighbors instead. + * Complexity: O(|V|). + */successors(v){var sucsV=this.#sucs[v];if(sucsV){return Object.keys(sucsV)}} +/** + * Return all nodes that are predecessors or successors of the specified node or undefined if + * node v is not in the graph. + * Complexity: O(|V|). + */neighbors(v){var preds=this.predecessors(v);if(preds){const union=new Set(preds);for(var succ of this.successors(v)){union.add(succ)}return Array.from(union.values())}}isLeaf(v){var neighbors;if(this.isDirected()){neighbors=this.successors(v)}else{neighbors=this.neighbors(v)}return neighbors.length===0} +/** + * Creates new graph with nodes filtered via filter. Edges incident to rejected node + * are also removed. In case of compound graph, if parent is rejected by filter, + * than all its children are rejected too. + * Average-case complexity: O(|E|+|V|). + */filterNodes(filter){var copy=new this.constructor({directed:this.#isDirected,multigraph:this.#isMultigraph,compound:this.#isCompound});copy.setGraph(this.graph());var self=this;Object.entries(this.#nodes).forEach(function([v,value]){if(filter(v)){copy.setNode(v,value)}});Object.values(this.#edgeObjs).forEach(function(e){if(copy.hasNode(e.v)&©.hasNode(e.w)){copy.setEdge(e,self.edge(e))}});var parents={};function findParent(v){var parent=self.parent(v);if(parent===undefined||copy.hasNode(parent)){parents[v]=parent;return parent}else if(parent in parents){return parents[parent]}else{return findParent(parent)}}if(this.#isCompound){copy.nodes().forEach(v=>copy.setParent(v,findParent(v)))}return copy} +/* === Edge functions ========== */ +/** + * Sets the default edge label or factory function. This label will be + * assigned as default label in case if no label was specified while setting + * an edge or this function will be invoked each time when setting an edge + * with no label specified and returned value * will be used as a label for edge. + * Complexity: O(1). + */setDefaultEdgeLabel(newDefault){this.#defaultEdgeLabelFn=newDefault;if(typeof newDefault!=="function"){this.#defaultEdgeLabelFn=()=>newDefault}return this} +/** + * Gets the number of edges in the graph. + * Complexity: O(1). + */edgeCount(){return this.#edgeCount} +/** + * Gets edges of the graph. In case of compound graph subgraphs are not considered. + * Complexity: O(|E|). + */edges(){return Object.values(this.#edgeObjs)} +/** + * Establish an edges path over the nodes in nodes list. If some edge is already + * exists, it will update its label, otherwise it will create an edge between pair + * of nodes with label provided or default label if no label provided. + * Complexity: O(|nodes|). + */setPath(vs,value){var self=this;var args=arguments;vs.reduce(function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this} +/** + * Creates or updates the label for the edge (v, w) with the optionally supplied + * name. If label is supplied it is set as the value for the edge. If label is not + * supplied and the edge was created by this call then the default edge label will + * be assigned. The name parameter is only useful with multigraphs. + */setEdge(){var v,w,name,value;var valueSpecified=false;var arg0=arguments[0];if(typeof arg0==="object"&&arg0!==null&&"v"in arg0){v=arg0.v;w=arg0.w;name=arg0.name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arg0;w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(name!==undefined){name=""+name}var e=edgeArgsToId(this.#isDirected,v,w,name);if(this.#edgeLabels.hasOwnProperty(e)){if(valueSpecified){this.#edgeLabels[e]=value}return this}if(name!==undefined&&!this.#isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")} +// It didn't exist, so we need to create it. +// First ensure the nodes exist. +this.setNode(v);this.setNode(w);this.#edgeLabels[e]=valueSpecified?value:this.#defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this.#isDirected,v,w,name); +// Ensure we add undirected edges in a consistent way. +v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this.#edgeObjs[e]=edgeObj;incrementOrInitEntry(this.#preds[w],v);incrementOrInitEntry(this.#sucs[v],w);this.#in[w][e]=edgeObj;this.#out[v][e]=edgeObj;this.#edgeCount++;return this} +/** + * Gets the label for the specified edge. + * Complexity: O(1). + */edge(v,w,name){var e=arguments.length===1?edgeObjToId(this.#isDirected,arguments[0]):edgeArgsToId(this.#isDirected,v,w,name);return this.#edgeLabels[e]} +/** + * Gets the label for the specified edge and converts it to an object. + * Complexity: O(1) + */edgeAsObj(){const edge=this.edge(...arguments);if(typeof edge!=="object"){return{label:edge}}return edge} +/** + * Detects whether the graph contains specified edge or not. No subgraphs are considered. + * Complexity: O(1). + */hasEdge(v,w,name){var e=arguments.length===1?edgeObjToId(this.#isDirected,arguments[0]):edgeArgsToId(this.#isDirected,v,w,name);return this.#edgeLabels.hasOwnProperty(e)} +/** + * Removes the specified edge from the graph. No subgraphs are considered. + * Complexity: O(1). + */removeEdge(v,w,name){var e=arguments.length===1?edgeObjToId(this.#isDirected,arguments[0]):edgeArgsToId(this.#isDirected,v,w,name);var edge=this.#edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this.#edgeLabels[e];delete this.#edgeObjs[e];decrementOrRemoveEntry(this.#preds[w],v);decrementOrRemoveEntry(this.#sucs[v],w);delete this.#in[w][e];delete this.#out[v][e];this.#edgeCount--}return this} +/** + * Return all edges that point to the node v. Optionally filters those edges down to just those + * coming from node u. Behavior is undefined for undirected graphs - use nodeEdges instead. + * Complexity: O(|E|). + */inEdges(v,u){var inV=this.#in[v];if(inV){var edges=Object.values(inV);if(!u){return edges}return edges.filter(edge=>edge.v===u)}} +/** + * Return all edges that are pointed at by node v. Optionally filters those edges down to just + * those point to w. Behavior is undefined for undirected graphs - use nodeEdges instead. + * Complexity: O(|E|). + */outEdges(v,w){var outV=this.#out[v];if(outV){var edges=Object.values(outV);if(!w){return edges}return edges.filter(edge=>edge.w===w)}} +/** + * Returns all edges to or from node v regardless of direction. Optionally filters those edges + * down to just those between nodes v and w regardless of direction. + * Complexity: O(|E|). + */nodeEdges(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}}}function incrementOrInitEntry(map,k){if(map[k]){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v_,w_,name){var v=""+v_;var w=""+w_;if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(name===undefined?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v_,w_,name){var v=""+v_;var w=""+w_;if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}module.exports=Graph},{}],25:[function(require,module,exports){ +// Includes only the "core" of graphlib +module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":24,"./version":27}],26:[function(require,module,exports){var Graph=require("./graph");module.exports={write:write,read:read}; +/** + * Creates a JSON representation of the graph that can be serialized to a string with + * JSON.stringify. The graph can later be restored using json.read. + */function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(g.graph()!==undefined){json.value=structuredClone(g.graph())}return json}function writeNodes(g){return g.nodes().map(function(v){var nodeValue=g.node(v);var parent=g.parent(v);var node={v:v};if(nodeValue!==undefined){node.value=nodeValue}if(parent!==undefined){node.parent=parent}return node})}function writeEdges(g){return g.edges().map(function(e){var edgeValue=g.edge(e);var edge={v:e.v,w:e.w};if(e.name!==undefined){edge.name=e.name}if(edgeValue!==undefined){edge.value=edgeValue}return edge})} +/** + * Takes JSON as input and returns the graph representation. + * + * @example + * var g2 = graphlib.json.read(JSON.parse(str)); + * g2.nodes(); + * // ['a', 'b'] + * g2.edges() + * // [ { v: 'a', w: 'b' } ] + */function read(json){var g=new Graph(json.options).setGraph(json.value);json.nodes.forEach(function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});json.edges.forEach(function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":24}],27:[function(require,module,exports){module.exports="2.1.13"},{}]},{},[1]); diff --git a/docs/_layouts/redirect.html b/docs/_layouts/redirect.html new file mode 100644 index 0000000000000..72a0462fc6a30 --- /dev/null +++ b/docs/_layouts/redirect.html @@ -0,0 +1,28 @@ + + + + + +Redirecting… + + + + +

Redirecting…

+Click here if you are not redirected. + \ No newline at end of file diff --git a/docs/css/custom.css b/docs/css/custom.css index 4576f45d1ab7d..c4388c9650bf4 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -9,6 +9,7 @@ body { overflow-wrap: anywhere; overflow-x: hidden; padding-top: 80px; + padding-bottom: 20px; } a { diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index b1a54a089a542..fef4db77f7230 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -136,7 +136,7 @@ If you want to deploy a Spark Application into a Mesos cluster that is running i - `LIBPROCESS_SSL_KEY_FILE=pathToKeyFile.key` path to key - `LIBPROCESS_SSL_CERT_FILE=pathToCRTFile.crt` the certificate file to be used -All options can be found at http://mesos.apache.org/documentation/latest/ssl/ +All options can be found at [http://mesos.apache.org/documentation/latest/ssl/](http://mesos.apache.org/documentation/latest/ssl/) Then submit happens as described in Client mode or Cluster mode below @@ -579,7 +579,7 @@ See the [configuration page](configuration.html) for information on Spark config This only affects docker containers, and must be one of "docker" or "mesos". Mesos supports two types of containerizers for docker: the "docker" containerizer, and the preferred - "mesos" containerizer. Read more here: http://mesos.apache.org/documentation/latest/container-image/ + "mesos" containerizer. Read more here. 2.1.0 diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 5eec6c490cb1f..709cffda9b0a7 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -163,7 +163,7 @@ To use a custom metrics.properties for the application master and executors, upd Amount of resource to use for the YARN Application Master in client mode. In cluster mode, use spark.yarn.driver.resource.<resource-type>.amount instead. Please note that this feature can be used only with YARN 3.0+ - For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html + For reference, see YARN Resource Model documentation

Example: To request GPU resources from YARN, use: spark.yarn.am.resource.yarn.io/gpu.amount @@ -185,7 +185,7 @@ To use a custom metrics.properties for the application master and executors, upd Amount of resource to use for the YARN Application Master in cluster mode. Please note that this feature can be used only with YARN 3.0+ - For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html + For reference, see YARN Resource Model documentation

Example: To request GPU resources from YARN, use: spark.yarn.driver.resource.yarn.io/gpu.amount @@ -198,7 +198,7 @@ To use a custom metrics.properties for the application master and executors, upd Amount of resource to use per executor process. Please note that this feature can be used only with YARN 3.0+ - For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html + For reference, see YARN Resource Model documentation

Example: To request GPU resources from YARN, use: spark.yarn.executor.resource.yarn.io/gpu.amount diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py index e66d08400b46b..246427a87b918 100644 --- a/python/pyspark/pandas/groupby.py +++ b/python/pyspark/pandas/groupby.py @@ -857,10 +857,10 @@ def sum(self, numeric_only: Optional[bool] = True, min_count: int = 0) -> FrameL ... "C": [3, 4, 3, 4], "D": ["a", "a", "b", "a"]}) >>> df.groupby("A").sum().sort_index() - B C + B C D A - 1 1 6 - 2 1 8 + 1 1 6 ab + 2 1 8 aa >>> df.groupby("D").sum().sort_index() A B C @@ -900,17 +900,17 @@ def sum(self, numeric_only: Optional[bool] = True, min_count: int = 0) -> FrameL unsupported = [ col.name for col in self._agg_columns - if not isinstance(col.spark.data_type, (NumericType, BooleanType)) + if not isinstance(col.spark.data_type, (NumericType, BooleanType, StringType)) ] if len(unsupported) > 0: log_advice( - "GroupBy.sum() can only support numeric and bool columns even if" + "GroupBy.sum() can only support numeric, bool and string columns even if" f"numeric_only=False, skip unsupported columns: {unsupported}" ) return self._reduce_for_stat_function( F.sum, - accepted_spark_types=(NumericType, BooleanType), + accepted_spark_types=(NumericType, BooleanType, StringType), bool_to_numeric=True, min_count=min_count, ) @@ -3534,7 +3534,21 @@ def _reduce_for_stat_function( for label in psdf._internal.column_labels: psser = psdf._psser_for(label) input_scol = psser._dtype_op.nan_to_null(psser).spark.column - output_scol = sfun(input_scol) + if sfun.__name__ == "sum" and isinstance( + psdf._internal.spark_type_for(label), StringType + ): + input_scol_name = psser._internal.data_spark_column_names[0] + # Sort data with natural order column to ensure order of data + sorted_array = F.array_sort( + F.collect_list(F.struct(NATURAL_ORDER_COLUMN_NAME, input_scol)) + ) + + # Using transform to extract strings + output_scol = F.concat_ws( + "", F.transform(sorted_array, lambda x: x.getField(input_scol_name)) + ) + else: + output_scol = sfun(input_scol) if min_count > 0: output_scol = F.when( @@ -3591,7 +3605,9 @@ def _prepare_reduce( ): agg_columns.append(psser) sdf = self._psdf._internal.spark_frame.select( - *groupkey_scols, *[psser.spark.column for psser in agg_columns] + *groupkey_scols, + *[psser.spark.column for psser in agg_columns], + NATURAL_ORDER_COLUMN_NAME, ) internal = InternalFrame( spark_frame=sdf, diff --git a/python/pyspark/pandas/tests/groupby/test_groupby.py b/python/pyspark/pandas/tests/groupby/test_groupby.py index 543ceff86256f..e162bed756bd6 100644 --- a/python/pyspark/pandas/tests/groupby/test_groupby.py +++ b/python/pyspark/pandas/tests/groupby/test_groupby.py @@ -59,9 +59,6 @@ def test_groupby_simple(self): }, index=[0, 1, 3, 5, 6, 8, 9, 9, 9], ) - if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"): - # TODO(SPARK-43295): Make DataFrameGroupBy.sum support for string type columns - pdf = pdf[["a", "b", "c", "e"]] psdf = ps.from_pandas(pdf) for as_index in [True, False]: @@ -180,9 +177,6 @@ def sort(df): index=[0, 1, 3, 5, 6, 8, 9, 9, 9], ) psdf = ps.from_pandas(pdf) - if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"): - # TODO(SPARK-43295): Make DataFrameGroupBy.sum support for string type columns - pdf = pdf[[10, 20, 30]] for as_index in [True, False]: if as_index: diff --git a/python/pyspark/pandas/tests/groupby/test_stat.py b/python/pyspark/pandas/tests/groupby/test_stat.py index 44bb3b0070914..bc78e02c90e39 100644 --- a/python/pyspark/pandas/tests/groupby/test_stat.py +++ b/python/pyspark/pandas/tests/groupby/test_stat.py @@ -113,7 +113,7 @@ def test_basic_stat_funcs(self): # self._test_stat_func(lambda groupby_obj: groupby_obj.sum(), check_exact=False) self.assert_eq( psdf.groupby("A").sum().sort_index(), - pdf.groupby("A").sum(numeric_only=True).sort_index(), + pdf.groupby("A").sum().sort_index(), check_exact=False, ) diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py index 23d1b04dd3d33..5a8b1e3792016 100644 --- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py @@ -66,7 +66,7 @@ def sort(df): self.assert_eq( sort(psdf1.groupby(psdf2.a, as_index=as_index).sum()), - sort(pdf1.groupby(pdf2.a, as_index=as_index).sum(numeric_only=True)), + sort(pdf1.groupby(pdf2.a, as_index=as_index).sum()), almost=as_index, ) @@ -93,7 +93,7 @@ def test_groupby_multiindex_columns(self): self.assert_eq( psdf1.groupby(psdf2[("x", "a")]).sum().sort_index(), - pdf1.groupby(pdf2[("x", "a")]).sum(numeric_only=True).sort_index(), + pdf1.groupby(pdf2[("x", "a")]).sum().sort_index(), ) self.assert_eq( @@ -102,7 +102,7 @@ def test_groupby_multiindex_columns(self): .sort_values(("y", "c")) .reset_index(drop=True), pdf1.groupby(pdf2[("x", "a")], as_index=False) - .sum(numeric_only=True) + .sum() .sort_values(("y", "c")) .reset_index(drop=True), ) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index f00c8c5ab427f..eaed565ed0e68 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -3814,9 +3814,10 @@ def groupBy(self, __cols: Union[List[Column], List[str], List[int]]) -> "Grouped ... def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ignore[misc] - """Groups the :class:`DataFrame` using the specified columns, - so we can run aggregation on them. See :class:`GroupedData` - for all the available aggregate functions. + """ + Groups the :class:`DataFrame` by the specified columns so that aggregation + can be performed on them. + See :class:`GroupedData` for all the available aggregate functions. :func:`groupby` is an alias for :func:`groupBy`. @@ -3831,14 +3832,14 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign Parameters ---------- cols : list, str or :class:`Column` - columns to group by. - Each element should be a column name (string) or an expression (:class:`Column`) + The columns to group by. + Each element can be a column name (string) or an expression (:class:`Column`) or a column ordinal (int, 1-based) or list of them. Returns ------- :class:`GroupedData` - Grouped data by given columns. + A :class:`GroupedData` object representing the grouped data by the specified columns. Notes ----- @@ -3848,9 +3849,9 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign Examples -------- >>> df = spark.createDataFrame([ - ... (2, "Alice"), (2, "Bob"), (2, "Bob"), (5, "Bob")], schema=["age", "name"]) + ... ("Alice", 2), ("Bob", 2), ("Bob", 2), ("Bob", 5)], schema=["name", "age"]) - Empty grouping columns triggers a global aggregation. + Example 1: Empty grouping columns triggers a global aggregation. >>> df.groupBy().avg().show() +--------+ @@ -3859,7 +3860,7 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign | 2.75| +--------+ - Group-by 'name', and specify a dictionary to calculate the summation of 'age'. + Example 2: Group-by 'name', and specify a dictionary to calculate the summation of 'age'. >>> df.groupBy("name").agg({"age": "sum"}).sort("name").show() +-----+--------+ @@ -3869,7 +3870,7 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign | Bob| 9| +-----+--------+ - Group-by 'name', and calculate maximum values. + Example 3: Group-by 'name', and calculate maximum values. >>> df.groupBy(df.name).max().sort("name").show() +-----+--------+ @@ -3879,9 +3880,9 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign | Bob| 5| +-----+--------+ - Also group-by 'name', but using the column ordinal. + Example 4: Also group-by 'name', but using the column ordinal. - >>> df.groupBy(2).max().sort("name").show() + >>> df.groupBy(1).max().sort("name").show() +-----+--------+ | name|max(age)| +-----+--------+ @@ -3889,7 +3890,7 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign | Bob| 5| +-----+--------+ - Group-by 'name' and 'age', and calculate the number of rows in each group. + Example 5: Group-by 'name' and 'age', and calculate the number of rows in each group. >>> df.groupBy(["name", df.age]).count().sort("name", "age").show() +-----+---+-----+ @@ -3900,9 +3901,9 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ign | Bob| 5| 1| +-----+---+-----+ - Also Group-by 'name' and 'age', but using the column ordinal. + Example 6: Also Group-by 'name' and 'age', but using the column ordinal. - >>> df.groupBy([df.name, 1]).count().sort("name", "age").show() + >>> df.groupBy([df.name, 2]).count().sort("name", "age").show() +-----+---+-----+ | name|age|count| +-----+---+-----+ @@ -3927,7 +3928,7 @@ def rollup(self, __cols: Union[List[Column], List[str]]) -> "GroupedData": def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: ignore[misc] """ Create a multi-dimensional rollup for the current :class:`DataFrame` using - the specified columns, so we can run aggregation on them. + the specified columns, allowing for aggregation on them. .. versionadded:: 1.4.0 @@ -3940,14 +3941,14 @@ def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: igno Parameters ---------- cols : list, str or :class:`Column` - Columns to roll-up by. + The columns to roll-up by. Each element should be a column name (string) or an expression (:class:`Column`) - or list of them. + or a column ordinal (int, 1-based) or list of them. Returns ------- :class:`GroupedData` - Rolled-up data by given columns. + Rolled-up data based on the specified columns. Notes ----- @@ -3956,7 +3957,22 @@ def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: igno Examples -------- - >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"]) + >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], schema=["name", "age"]) + + Example 1: Rollup-by 'name', and calculate the number of rows in each dimensional. + + >>> df.rollup("name").count().orderBy("name").show() + +-----+-----+ + | name|count| + +-----+-----+ + | NULL| 2| + |Alice| 1| + | Bob| 1| + +-----+-----+ + + Example 2: Rollup-by 'name' and 'age', + and calculate the number of rows in each dimensional. + >>> df.rollup("name", df.age).count().orderBy("name", "age").show() +-----+----+-----+ | name| age|count| @@ -3968,7 +3984,9 @@ def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": # type: igno | Bob| 5| 1| +-----+----+-----+ - >>> df.rollup(2, 1).count().orderBy(1, 2).show() + Example 3: Also Rollup-by 'name' and 'age', but using the column ordinal. + + >>> df.rollup(1, 2).count().orderBy(1, 2).show() +-----+----+-----+ | name| age|count| +-----+----+-----+ @@ -3995,7 +4013,7 @@ def cube(self, __cols: Union[List[Column], List[str]]) -> "GroupedData": def cube(self, *cols: "ColumnOrName") -> "GroupedData": # type: ignore[misc] """ Create a multi-dimensional cube for the current :class:`DataFrame` using - the specified columns, so we can run aggregations on them. + the specified columns, allowing aggregations to be performed on them. .. versionadded:: 1.4.0 @@ -4008,14 +4026,14 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData": # type: ignore[misc] Parameters ---------- cols : list, str or :class:`Column` - columns to create cube by. + The columns to cube by. Each element should be a column name (string) or an expression (:class:`Column`) - or list of them. + or a column ordinal (int, 1-based) or list of them. Returns ------- :class:`GroupedData` - Cube of the data by given columns. + Cube of the data based on the specified columns. Notes ----- @@ -4024,7 +4042,23 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData": # type: ignore[misc] Examples -------- - >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"]) + >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], schema=["name", "age"]) + + Example 1: Creating a cube on 'name', + and calculate the number of rows in each dimensional. + + >>> df.cube("name").count().orderBy("name").show() + +-----+-----+ + | name|count| + +-----+-----+ + | NULL| 2| + |Alice| 1| + | Bob| 1| + +-----+-----+ + + Example 2: Creating a cube on 'name' and 'age', + and calculate the number of rows in each dimensional. + >>> df.cube("name", df.age).count().orderBy("name", "age").show() +-----+----+-----+ | name| age|count| @@ -4038,7 +4072,9 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData": # type: ignore[misc] | Bob| 5| 1| +-----+----+-----+ - >>> df.cube(2, 1).count().orderBy(1, 2).show() + Example 3: Also creating a cube on 'name' and 'age', but using the column ordinal. + + >>> df.cube(1, 2).count().orderBy(1, 2).show() +-----+----+-----+ | name| age|count| +-----+----+-----+ diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 3138cafa1262a..f35477b7edc3c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -55,11 +55,12 @@ # Keep pandas_udf and PandasUDFType import for backwards compatible import; moved in SPARK-28264 from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType # noqa: F401 + from pyspark.sql.utils import ( - to_str, - has_numpy, - try_remote_functions, - get_active_spark_context, + to_str as _to_str, + has_numpy as _has_numpy, + try_remote_functions as _try_remote_functions, + get_active_spark_context as _get_active_spark_context, ) if TYPE_CHECKING: @@ -70,7 +71,7 @@ UserDefinedFunctionLike, ) -if has_numpy: +if _has_numpy: import numpy as np # Note to developers: all of PySpark functions here take string as column names whenever possible. @@ -80,437 +81,6 @@ # since it requires making every single overridden definition. -__all__ = [ - "abs", - "acos", - "acosh", - "add_months", - "aes_decrypt", - "aes_encrypt", - "aggregate", - "any_value", - "approxCountDistinct", - "approx_count_distinct", - "approx_percentile", - "array", - "array_agg", - "array_append", - "array_compact", - "array_contains", - "array_distinct", - "array_except", - "array_insert", - "array_intersect", - "array_join", - "array_max", - "array_min", - "array_position", - "array_prepend", - "array_remove", - "array_repeat", - "array_size", - "array_sort", - "array_union", - "arrays_overlap", - "arrays_zip", - "asc", - "asc_nulls_first", - "asc_nulls_last", - "ascii", - "asin", - "asinh", - "assert_true", - "atan", - "atan2", - "atanh", - "avg", - "base64", - "bin", - "bit_and", - "bit_count", - "bit_get", - "bit_length", - "bit_or", - "bit_xor", - "bitmap_bit_position", - "bitmap_bucket_number", - "bitmap_construct_agg", - "bitmap_count", - "bitmap_or_agg", - "bitwiseNOT", - "bitwise_not", - "bool_and", - "bool_or", - "broadcast", - "bround", - "btrim", - "bucket", - "call_function", - "call_udf", - "cardinality", - "cast", - "cbrt", - "ceil", - "ceiling", - "char", - "char_length", - "character_length", - "coalesce", - "col", - "collect_list", - "collect_set", - "column", - "concat", - "concat_ws", - "contains", - "conv", - "convert_timezone", - "corr", - "cos", - "cosh", - "cot", - "count", - "countDistinct", - "count_distinct", - "count_if", - "count_min_sketch", - "covar_pop", - "covar_samp", - "crc32", - "create_map", - "csc", - "cume_dist", - "curdate", - "current_catalog", - "current_database", - "current_date", - "current_schema", - "current_timestamp", - "current_timezone", - "current_user", - "date_add", - "date_diff", - "date_format", - "date_from_unix_date", - "date_part", - "date_sub", - "date_trunc", - "dateadd", - "datediff", - "datepart", - "day", - "dayofmonth", - "dayofweek", - "dayofyear", - "days", - "decode", - "degrees", - "dense_rank", - "desc", - "desc_nulls_first", - "desc_nulls_last", - "e", - "element_at", - "elt", - "encode", - "endswith", - "equal_null", - "every", - "exists", - "exp", - "explode", - "explode_outer", - "expm1", - "expr", - "extract", - "factorial", - "filter", - "find_in_set", - "first", - "first_value", - "flatten", - "floor", - "forall", - "format_number", - "format_string", - "from_csv", - "from_json", - "from_unixtime", - "from_utc_timestamp", - "get", - "get_json_object", - "getbit", - "greatest", - "grouping", - "grouping_id", - "hash", - "hex", - "histogram_numeric", - "hll_sketch_agg", - "hll_sketch_estimate", - "hll_union", - "hll_union_agg", - "hour", - "hours", - "hypot", - "ifnull", - "ilike", - "initcap", - "inline", - "inline_outer", - "input_file_block_length", - "input_file_block_start", - "input_file_name", - "instr", - "isnan", - "isnotnull", - "isnull", - "java_method", - "json_array_length", - "json_object_keys", - "json_tuple", - "kurtosis", - "lag", - "last", - "last_day", - "last_value", - "lcase", - "lead", - "least", - "left", - "length", - "levenshtein", - "like", - "lit", - "ln", - "localtimestamp", - "locate", - "log", - "log10", - "log1p", - "log2", - "lower", - "lpad", - "ltrim", - "make_date", - "make_dt_interval", - "make_interval", - "make_timestamp", - "make_timestamp_ltz", - "make_timestamp_ntz", - "make_ym_interval", - "map_concat", - "map_contains_key", - "map_entries", - "map_filter", - "map_from_arrays", - "map_from_entries", - "map_keys", - "map_values", - "map_zip_with", - "mask", - "max", - "max_by", - "md5", - "mean", - "median", - "min", - "min_by", - "minute", - "mode", - "monotonically_increasing_id", - "month", - "months", - "months_between", - "named_struct", - "nanvl", - "negate", - "negative", - "next_day", - "now", - "nth_value", - "ntile", - "nullif", - "nvl", - "nvl2", - "octet_length", - "overlay", - "overload", - "parse_url", - "percent_rank", - "percentile", - "percentile_approx", - "pi", - "pmod", - "posexplode", - "posexplode_outer", - "position", - "positive", - "pow", - "power", - "printf", - "product", - "quarter", - "radians", - "raise_error", - "rand", - "randn", - "rank", - "reduce", - "reflect", - "regexp", - "regexp_count", - "regexp_extract", - "regexp_extract_all", - "regexp_instr", - "regexp_like", - "regexp_replace", - "regexp_substr", - "regr_avgx", - "regr_avgy", - "regr_count", - "regr_intercept", - "regr_r2", - "regr_slope", - "regr_sxx", - "regr_sxy", - "regr_syy", - "repeat", - "replace", - "reverse", - "right", - "rint", - "rlike", - "round", - "row_number", - "rpad", - "rtrim", - "schema_of_csv", - "schema_of_json", - "sec", - "second", - "sentences", - "sequence", - "session_window", - "sha", - "sha1", - "sha2", - "shiftLeft", - "shiftRight", - "shiftRightUnsigned", - "shiftleft", - "shiftright", - "shiftrightunsigned", - "shuffle", - "sign", - "signum", - "sin", - "sinh", - "size", - "skewness", - "slice", - "some", - "sort_array", - "soundex", - "spark_partition_id", - "split", - "split_part", - "sqrt", - "stack", - "startswith", - "std", - "stddev", - "stddev_pop", - "stddev_samp", - "str_to_map", - "struct", - "substr", - "substring", - "substring_index", - "sum", - "sumDistinct", - "sum_distinct", - "tan", - "tanh", - "timestamp_micros", - "timestamp_millis", - "timestamp_seconds", - "toDegrees", - "toRadians", - "to_binary", - "to_char", - "to_csv", - "to_date", - "to_json", - "to_number", - "to_timestamp", - "to_timestamp_ltz", - "to_timestamp_ntz", - "to_unix_timestamp", - "to_utc_timestamp", - "to_varchar", - "transform", - "transform_keys", - "transform_values", - "translate", - "trim", - "trunc", - "try_add", - "try_aes_decrypt", - "try_avg", - "try_divide", - "try_element_at", - "try_multiply", - "try_reflect", - "try_subtract", - "try_sum", - "try_to_binary", - "try_to_number", - "try_to_timestamp", - "typeof", - "ucase", - "udf", - "udtf", - "unbase64", - "unhex", - "unix_date", - "unix_micros", - "unix_millis", - "unix_seconds", - "unix_timestamp", - "unwrap_udt", - "upper", - "url_decode", - "url_encode", - "user", - "var_pop", - "var_samp", - "variance", - "version", - "weekday", - "weekofyear", - "when", - "width_bucket", - "window", - "window_time", - "xpath", - "xpath_boolean", - "xpath_double", - "xpath_float", - "xpath_int", - "xpath_long", - "xpath_number", - "xpath_short", - "xpath_string", - "xxhash64", - "year", - "years", - "zip_with", - "pandas_udf", - "PandasUDFType", -] - - def _get_jvm_function(name: str, sc: SparkContext) -> Callable: """ Retrieves JVM function identified by name from @@ -543,7 +113,7 @@ def _invoke_function_over_seq_of_columns(name: str, cols: "Iterable[ColumnOrName Invokes unary JVM function identified by name with and wraps the result with :class:`~pyspark.sql.Column`. """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function(name, _to_seq(sc, cols, _to_java_column)) @@ -563,11 +133,11 @@ def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column: def _options_to_str(options: Optional[Dict[str, Any]] = None) -> Dict[str, Optional[str]]: if options: - return {key: to_str(value) for (key, value) in options.items()} + return {key: _to_str(value) for (key, value) in options.items()} return {} -@try_remote_functions +@_try_remote_functions def lit(col: Any) -> Column: """ Creates a :class:`~pyspark.sql.Column` of literal value. @@ -619,14 +189,14 @@ def lit(col: Any) -> Column: ) return array(*[lit(item) for item in col]) else: - if has_numpy and isinstance(col, np.generic): + if _has_numpy and isinstance(col, np.generic): dt = _from_numpy_type(col.dtype) if dt is not None: return _invoke_function("lit", col).astype(dt).alias(str(col)) return _invoke_function("lit", col) -@try_remote_functions +@_try_remote_functions def col(col: str) -> Column: """ Returns a :class:`~pyspark.sql.Column` based on the given column name. @@ -659,7 +229,7 @@ def col(col: str) -> Column: column = col -@try_remote_functions +@_try_remote_functions def asc(col: "ColumnOrName") -> Column: """ Returns a sort expression for the target column in ascending order. @@ -738,7 +308,7 @@ def asc(col: "ColumnOrName") -> Column: return col.asc() if isinstance(col, Column) else _invoke_function("asc", col) -@try_remote_functions +@_try_remote_functions def desc(col: "ColumnOrName") -> Column: """ Returns a sort expression for the target column in descending order. @@ -817,7 +387,7 @@ def desc(col: "ColumnOrName") -> Column: return col.desc() if isinstance(col, Column) else _invoke_function("desc", col) -@try_remote_functions +@_try_remote_functions def sqrt(col: "ColumnOrName") -> Column: """ Computes the square root of the specified float value. @@ -850,7 +420,7 @@ def sqrt(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sqrt", col) -@try_remote_functions +@_try_remote_functions def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column: """ Returns the sum of `left`and `right` and the result is null on overflow. @@ -917,7 +487,7 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_add", left, right) -@try_remote_functions +@_try_remote_functions def try_avg(col: "ColumnOrName") -> Column: """ Returns the mean calculated from values of a group and the result is null on overflow. @@ -943,7 +513,7 @@ def try_avg(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_avg", col) -@try_remote_functions +@_try_remote_functions def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column: """ Returns `dividend`/`divisor`. It always performs floating point division. Its result is @@ -992,7 +562,7 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_divide", left, right) -@try_remote_functions +@_try_remote_functions def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column: """ Returns `left`*`right` and the result is null on overflow. The acceptable input types are the @@ -1042,7 +612,7 @@ def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_multiply", left, right) -@try_remote_functions +@_try_remote_functions def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column: """ Returns `left`-`right` and the result is null on overflow. The acceptable input types are the @@ -1109,7 +679,7 @@ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_subtract", left, right) -@try_remote_functions +@_try_remote_functions def try_sum(col: "ColumnOrName") -> Column: """ Returns the sum calculated from values of a group and the result is null on overflow. @@ -1133,7 +703,7 @@ def try_sum(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_sum", col) -@try_remote_functions +@_try_remote_functions def abs(col: "ColumnOrName") -> Column: """ Computes the absolute value. @@ -1166,7 +736,7 @@ def abs(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("abs", col) -@try_remote_functions +@_try_remote_functions def mode(col: "ColumnOrName") -> Column: """ Returns the most frequent value in a group. @@ -1205,7 +775,7 @@ def mode(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("mode", col) -@try_remote_functions +@_try_remote_functions def max(col: "ColumnOrName") -> Column: """ Aggregate function: returns the maximum value of the expression in a group. @@ -1310,7 +880,7 @@ def max(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("max", col) -@try_remote_functions +@_try_remote_functions def min(col: "ColumnOrName") -> Column: """ Aggregate function: returns the minimum value of the expression in a group. @@ -1399,7 +969,7 @@ def min(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("min", col) -@try_remote_functions +@_try_remote_functions def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column: """ Returns the value from the `col` parameter that is associated with the maximum value @@ -1476,7 +1046,7 @@ def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column: return _invoke_function_over_columns("max_by", col, ord) -@try_remote_functions +@_try_remote_functions def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column: """ Returns the value from the `col` parameter that is associated with the minimum value @@ -1553,7 +1123,7 @@ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column: return _invoke_function_over_columns("min_by", col, ord) -@try_remote_functions +@_try_remote_functions def count(col: "ColumnOrName") -> Column: """ Aggregate function: returns the number of items in a group. @@ -1588,7 +1158,7 @@ def count(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("count", col) -@try_remote_functions +@_try_remote_functions def sum(col: "ColumnOrName") -> Column: """ Aggregate function: returns the sum of all values in the expression. @@ -1621,7 +1191,7 @@ def sum(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sum", col) -@try_remote_functions +@_try_remote_functions def avg(col: "ColumnOrName") -> Column: """ Aggregate function: returns the average of the values in a group. @@ -1654,7 +1224,7 @@ def avg(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("avg", col) -@try_remote_functions +@_try_remote_functions def mean(col: "ColumnOrName") -> Column: """ Aggregate function: returns the average of the values in a group. @@ -1688,7 +1258,7 @@ def mean(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("mean", col) -@try_remote_functions +@_try_remote_functions def median(col: "ColumnOrName") -> Column: """ Returns the median of the values in a group. @@ -1727,7 +1297,7 @@ def median(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("median", col) -@try_remote_functions +@_try_remote_functions def sumDistinct(col: "ColumnOrName") -> Column: """ Aggregate function: returns the sum of distinct values in the expression. @@ -1744,7 +1314,7 @@ def sumDistinct(col: "ColumnOrName") -> Column: return sum_distinct(col) -@try_remote_functions +@_try_remote_functions def sum_distinct(col: "ColumnOrName") -> Column: """ Aggregate function: returns the sum of distinct values in the expression. @@ -1777,7 +1347,7 @@ def sum_distinct(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sum_distinct", col) -@try_remote_functions +@_try_remote_functions def product(col: "ColumnOrName") -> Column: """ Aggregate function: returns the product of the values in a group. @@ -1813,7 +1383,7 @@ def product(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("product", col) -@try_remote_functions +@_try_remote_functions def acos(col: "ColumnOrName") -> Column: """ Computes inverse cosine of the input column. @@ -1847,7 +1417,7 @@ def acos(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("acos", col) -@try_remote_functions +@_try_remote_functions def acosh(col: "ColumnOrName") -> Column: """ Computes inverse hyperbolic cosine of the input column. @@ -1881,7 +1451,7 @@ def acosh(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("acosh", col) -@try_remote_functions +@_try_remote_functions def asin(col: "ColumnOrName") -> Column: """ Computes inverse sine of the input column. @@ -1915,7 +1485,7 @@ def asin(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("asin", col) -@try_remote_functions +@_try_remote_functions def asinh(col: "ColumnOrName") -> Column: """ Computes inverse hyperbolic sine of the input column. @@ -1948,7 +1518,7 @@ def asinh(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("asinh", col) -@try_remote_functions +@_try_remote_functions def atan(col: "ColumnOrName") -> Column: """ Compute inverse tangent of the input column. @@ -1981,7 +1551,7 @@ def atan(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("atan", col) -@try_remote_functions +@_try_remote_functions def atanh(col: "ColumnOrName") -> Column: """ Computes inverse hyperbolic tangent of the input column. @@ -2015,7 +1585,7 @@ def atanh(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("atanh", col) -@try_remote_functions +@_try_remote_functions def cbrt(col: "ColumnOrName") -> Column: """ Computes the cube-root of the given value. @@ -2048,7 +1618,7 @@ def cbrt(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("cbrt", col) -@try_remote_functions +@_try_remote_functions def ceil(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Column: """ Computes the ceiling of the given value. @@ -2097,7 +1667,7 @@ def ceil(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Col return _invoke_function_over_columns("ceil", col, scale) -@try_remote_functions +@_try_remote_functions def ceiling(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Column: """ Computes the ceiling of the given value. @@ -2146,7 +1716,7 @@ def ceiling(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> return _invoke_function_over_columns("ceiling", col, scale) -@try_remote_functions +@_try_remote_functions def cos(col: "ColumnOrName") -> Column: """ Computes cosine of the input column. @@ -2176,7 +1746,7 @@ def cos(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("cos", col) -@try_remote_functions +@_try_remote_functions def cosh(col: "ColumnOrName") -> Column: """ Computes hyperbolic cosine of the input column. @@ -2205,7 +1775,7 @@ def cosh(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("cosh", col) -@try_remote_functions +@_try_remote_functions def cot(col: "ColumnOrName") -> Column: """ Computes cotangent of the input column. @@ -2235,7 +1805,7 @@ def cot(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("cot", col) -@try_remote_functions +@_try_remote_functions def csc(col: "ColumnOrName") -> Column: """ Computes cosecant of the input column. @@ -2265,7 +1835,7 @@ def csc(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("csc", col) -@try_remote_functions +@_try_remote_functions def e() -> Column: """Returns Euler's number. @@ -2283,7 +1853,7 @@ def e() -> Column: return _invoke_function("e") -@try_remote_functions +@_try_remote_functions def exp(col: "ColumnOrName") -> Column: """ Computes the exponential of the given value. @@ -2316,7 +1886,7 @@ def exp(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("exp", col) -@try_remote_functions +@_try_remote_functions def expm1(col: "ColumnOrName") -> Column: """ Computes the exponential of the given value minus one. @@ -2345,7 +1915,7 @@ def expm1(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("expm1", col) -@try_remote_functions +@_try_remote_functions def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Column: """ Computes the floor of the given value. @@ -2395,7 +1965,7 @@ def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co return _invoke_function_over_columns("floor", col, scale) -@try_remote_functions +@_try_remote_functions def log(col: "ColumnOrName") -> Column: """ Computes the natural logarithm of the given value. @@ -2425,7 +1995,7 @@ def log(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("log", col) -@try_remote_functions +@_try_remote_functions def log10(col: "ColumnOrName") -> Column: """ Computes the logarithm of the given value in Base 10. @@ -2458,7 +2028,7 @@ def log10(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("log10", col) -@try_remote_functions +@_try_remote_functions def log1p(col: "ColumnOrName") -> Column: """ Computes the natural logarithm of the "given value plus one". @@ -2493,7 +2063,7 @@ def log1p(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("log1p", col) -@try_remote_functions +@_try_remote_functions def negative(col: "ColumnOrName") -> Column: """ Returns the negative value. @@ -2528,7 +2098,7 @@ def negative(col: "ColumnOrName") -> Column: negate = negative -@try_remote_functions +@_try_remote_functions def pi() -> Column: """Returns Pi. @@ -2546,7 +2116,7 @@ def pi() -> Column: return _invoke_function("pi") -@try_remote_functions +@_try_remote_functions def positive(col: "ColumnOrName") -> Column: """ Returns the value. @@ -2578,7 +2148,7 @@ def positive(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("positive", col) -@try_remote_functions +@_try_remote_functions def rint(col: "ColumnOrName") -> Column: """ Returns the double value that is closest in value to the argument and @@ -2619,7 +2189,7 @@ def rint(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("rint", col) -@try_remote_functions +@_try_remote_functions def sec(col: "ColumnOrName") -> Column: """ Computes secant of the input column. @@ -2648,7 +2218,7 @@ def sec(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sec", col) -@try_remote_functions +@_try_remote_functions def signum(col: "ColumnOrName") -> Column: """ Computes the signum of the given value. @@ -2684,7 +2254,7 @@ def signum(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("signum", col) -@try_remote_functions +@_try_remote_functions def sign(col: "ColumnOrName") -> Column: """ Computes the signum of the given value. @@ -2720,7 +2290,7 @@ def sign(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sign", col) -@try_remote_functions +@_try_remote_functions def sin(col: "ColumnOrName") -> Column: """ Computes sine of the input column. @@ -2750,7 +2320,7 @@ def sin(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sin", col) -@try_remote_functions +@_try_remote_functions def sinh(col: "ColumnOrName") -> Column: """ Computes hyperbolic sine of the input column. @@ -2780,7 +2350,7 @@ def sinh(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sinh", col) -@try_remote_functions +@_try_remote_functions def tan(col: "ColumnOrName") -> Column: """ Computes tangent of the input column. @@ -2810,7 +2380,7 @@ def tan(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("tan", col) -@try_remote_functions +@_try_remote_functions def tanh(col: "ColumnOrName") -> Column: """ Computes hyperbolic tangent of the input column. @@ -2841,7 +2411,7 @@ def tanh(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("tanh", col) -@try_remote_functions +@_try_remote_functions def toDegrees(col: "ColumnOrName") -> Column: """ .. versionadded:: 1.4.0 @@ -2856,7 +2426,7 @@ def toDegrees(col: "ColumnOrName") -> Column: return degrees(col) -@try_remote_functions +@_try_remote_functions def toRadians(col: "ColumnOrName") -> Column: """ .. versionadded:: 1.4.0 @@ -2871,7 +2441,7 @@ def toRadians(col: "ColumnOrName") -> Column: return radians(col) -@try_remote_functions +@_try_remote_functions def bitwiseNOT(col: "ColumnOrName") -> Column: """ Computes bitwise not. @@ -2888,7 +2458,7 @@ def bitwiseNOT(col: "ColumnOrName") -> Column: return bitwise_not(col) -@try_remote_functions +@_try_remote_functions def bitwise_not(col: "ColumnOrName") -> Column: """ Computes bitwise not. @@ -2927,7 +2497,7 @@ def bitwise_not(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bitwise_not", col) -@try_remote_functions +@_try_remote_functions def bit_count(col: "ColumnOrName") -> Column: """ Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer, @@ -2961,7 +2531,7 @@ def bit_count(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bit_count", col) -@try_remote_functions +@_try_remote_functions def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column: """ Returns the value of the bit (0 or 1) at the specified position. @@ -2997,7 +2567,7 @@ def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column: return _invoke_function_over_columns("bit_get", col, pos) -@try_remote_functions +@_try_remote_functions def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column: """ Returns the value of the bit (0 or 1) at the specified position. @@ -3035,7 +2605,7 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column: return _invoke_function_over_columns("getbit", col, pos) -@try_remote_functions +@_try_remote_functions def asc_nulls_first(col: "ColumnOrName") -> Column: """ Returns a sort expression based on the ascending order of the given @@ -3078,7 +2648,7 @@ def asc_nulls_first(col: "ColumnOrName") -> Column: ) -@try_remote_functions +@_try_remote_functions def asc_nulls_last(col: "ColumnOrName") -> Column: """ Returns a sort expression based on the ascending order of the given @@ -3119,7 +2689,7 @@ def asc_nulls_last(col: "ColumnOrName") -> Column: ) -@try_remote_functions +@_try_remote_functions def desc_nulls_first(col: "ColumnOrName") -> Column: """ Returns a sort expression based on the descending order of the given @@ -3162,7 +2732,7 @@ def desc_nulls_first(col: "ColumnOrName") -> Column: ) -@try_remote_functions +@_try_remote_functions def desc_nulls_last(col: "ColumnOrName") -> Column: """ Returns a sort expression based on the descending order of the given @@ -3205,7 +2775,7 @@ def desc_nulls_last(col: "ColumnOrName") -> Column: ) -@try_remote_functions +@_try_remote_functions def stddev(col: "ColumnOrName") -> Column: """ Aggregate function: alias for stddev_samp. @@ -3238,7 +2808,7 @@ def stddev(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("stddev", col) -@try_remote_functions +@_try_remote_functions def std(col: "ColumnOrName") -> Column: """ Aggregate function: alias for stddev_samp. @@ -3268,7 +2838,7 @@ def std(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("std", col) -@try_remote_functions +@_try_remote_functions def stddev_samp(col: "ColumnOrName") -> Column: """ Aggregate function: returns the unbiased sample standard deviation of @@ -3302,7 +2872,7 @@ def stddev_samp(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("stddev_samp", col) -@try_remote_functions +@_try_remote_functions def stddev_pop(col: "ColumnOrName") -> Column: """ Aggregate function: returns population standard deviation of @@ -3336,7 +2906,7 @@ def stddev_pop(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("stddev_pop", col) -@try_remote_functions +@_try_remote_functions def variance(col: "ColumnOrName") -> Column: """ Aggregate function: alias for var_samp @@ -3369,7 +2939,7 @@ def variance(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("variance", col) -@try_remote_functions +@_try_remote_functions def var_samp(col: "ColumnOrName") -> Column: """ Aggregate function: returns the unbiased sample variance of @@ -3403,7 +2973,7 @@ def var_samp(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("var_samp", col) -@try_remote_functions +@_try_remote_functions def var_pop(col: "ColumnOrName") -> Column: """ Aggregate function: returns the population variance of the values in a group. @@ -3432,7 +3002,7 @@ def var_pop(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("var_pop", col) -@try_remote_functions +@_try_remote_functions def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns the average of the independent variable for non-null pairs @@ -3463,7 +3033,7 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_avgx", y, x) -@try_remote_functions +@_try_remote_functions def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns the average of the dependent variable for non-null pairs @@ -3494,7 +3064,7 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_avgy", y, x) -@try_remote_functions +@_try_remote_functions def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns the number of non-null number pairs @@ -3525,7 +3095,7 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_count", y, x) -@try_remote_functions +@_try_remote_functions def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns the intercept of the univariate linear regression line @@ -3557,7 +3127,7 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_intercept", y, x) -@try_remote_functions +@_try_remote_functions def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns the coefficient of determination for non-null pairs @@ -3588,7 +3158,7 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_r2", y, x) -@try_remote_functions +@_try_remote_functions def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns the slope of the linear regression line for non-null pairs @@ -3619,7 +3189,7 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_slope", y, x) -@try_remote_functions +@_try_remote_functions def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns REGR_COUNT(y, x) * VAR_POP(x) for non-null pairs @@ -3650,7 +3220,7 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_sxx", y, x) -@try_remote_functions +@_try_remote_functions def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns REGR_COUNT(y, x) * COVAR_POP(y, x) for non-null pairs @@ -3681,7 +3251,7 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_sxy", y, x) -@try_remote_functions +@_try_remote_functions def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column: """ Aggregate function: returns REGR_COUNT(y, x) * VAR_POP(y) for non-null pairs @@ -3712,7 +3282,7 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column: return _invoke_function_over_columns("regr_syy", y, x) -@try_remote_functions +@_try_remote_functions def every(col: "ColumnOrName") -> Column: """ Aggregate function: returns true if all values of `col` are true. @@ -3764,7 +3334,7 @@ def every(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("every", col) -@try_remote_functions +@_try_remote_functions def bool_and(col: "ColumnOrName") -> Column: """ Aggregate function: returns true if all values of `col` are true. @@ -3808,7 +3378,7 @@ def bool_and(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bool_and", col) -@try_remote_functions +@_try_remote_functions def some(col: "ColumnOrName") -> Column: """ Aggregate function: returns true if at least one value of `col` is true. @@ -3860,7 +3430,7 @@ def some(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("some", col) -@try_remote_functions +@_try_remote_functions def bool_or(col: "ColumnOrName") -> Column: """ Aggregate function: returns true if at least one value of `col` is true. @@ -3904,7 +3474,7 @@ def bool_or(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bool_or", col) -@try_remote_functions +@_try_remote_functions def bit_and(col: "ColumnOrName") -> Column: """ Aggregate function: returns the bitwise AND of all non-null input values, or null if none. @@ -3930,7 +3500,7 @@ def bit_and(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bit_and", col) -@try_remote_functions +@_try_remote_functions def bit_or(col: "ColumnOrName") -> Column: """ Aggregate function: returns the bitwise OR of all non-null input values, or null if none. @@ -3956,7 +3526,7 @@ def bit_or(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bit_or", col) -@try_remote_functions +@_try_remote_functions def bit_xor(col: "ColumnOrName") -> Column: """ Aggregate function: returns the bitwise XOR of all non-null input values, or null if none. @@ -3982,7 +3552,7 @@ def bit_xor(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bit_xor", col) -@try_remote_functions +@_try_remote_functions def skewness(col: "ColumnOrName") -> Column: """ Aggregate function: returns the skewness of the values in a group. @@ -4011,7 +3581,7 @@ def skewness(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("skewness", col) -@try_remote_functions +@_try_remote_functions def kurtosis(col: "ColumnOrName") -> Column: """ Aggregate function: returns the kurtosis of the values in a group. @@ -4044,7 +3614,7 @@ def kurtosis(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("kurtosis", col) -@try_remote_functions +@_try_remote_functions def collect_list(col: "ColumnOrName") -> Column: """ Aggregate function: returns a list of objects with duplicates. @@ -4078,7 +3648,7 @@ def collect_list(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("collect_list", col) -@try_remote_functions +@_try_remote_functions def array_agg(col: "ColumnOrName") -> Column: """ Aggregate function: returns a list of objects with duplicates. @@ -4104,7 +3674,7 @@ def array_agg(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_agg", col) -@try_remote_functions +@_try_remote_functions def collect_set(col: "ColumnOrName") -> Column: """ Aggregate function: returns a set of objects with duplicate elements eliminated. @@ -4138,7 +3708,7 @@ def collect_set(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("collect_set", col) -@try_remote_functions +@_try_remote_functions def degrees(col: "ColumnOrName") -> Column: """ Converts an angle measured in radians to an approximately equivalent angle @@ -4169,7 +3739,7 @@ def degrees(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("degrees", col) -@try_remote_functions +@_try_remote_functions def radians(col: "ColumnOrName") -> Column: """ Converts an angle measured in degrees to an approximately equivalent angle @@ -4199,7 +3769,7 @@ def radians(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("radians", col) -@try_remote_functions +@_try_remote_functions def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column: """ .. versionadded:: 1.4.0 @@ -4232,7 +3802,7 @@ def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float] return _invoke_binary_math_function("atan2", col1, col2) -@try_remote_functions +@_try_remote_functions def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column: """ Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow. @@ -4263,7 +3833,7 @@ def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float] return _invoke_binary_math_function("hypot", col1, col2) -@try_remote_functions +@_try_remote_functions def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column: """ Returns the value of the first argument raised to the power of the second argument. @@ -4297,7 +3867,7 @@ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) power = pow -@try_remote_functions +@_try_remote_functions def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column: """ Returns the positive value of dividend mod divisor. @@ -4346,7 +3916,7 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", return _invoke_binary_math_function("pmod", dividend, divisor) -@try_remote_functions +@_try_remote_functions def width_bucket( v: "ColumnOrName", min: "ColumnOrName", @@ -4398,7 +3968,7 @@ def width_bucket( return _invoke_function_over_columns("width_bucket", v, min, max, numBucket) -@try_remote_functions +@_try_remote_functions def row_number() -> Column: """ Window function: returns a sequential number starting at 1 within a window partition. @@ -4430,7 +4000,7 @@ def row_number() -> Column: return _invoke_function("row_number") -@try_remote_functions +@_try_remote_functions def dense_rank() -> Column: """ Window function: returns the rank of rows within a window partition, without any gaps. @@ -4473,7 +4043,7 @@ def dense_rank() -> Column: return _invoke_function("dense_rank") -@try_remote_functions +@_try_remote_functions def rank() -> Column: """ Window function: returns the rank of rows within a window partition. @@ -4516,7 +4086,7 @@ def rank() -> Column: return _invoke_function("rank") -@try_remote_functions +@_try_remote_functions def cume_dist() -> Column: """ Window function: returns the cumulative distribution of values within a window partition, @@ -4551,7 +4121,7 @@ def cume_dist() -> Column: return _invoke_function("cume_dist") -@try_remote_functions +@_try_remote_functions def percent_rank() -> Column: """ Window function: returns the relative rank (i.e. percentile) of rows within a window partition. @@ -4586,7 +4156,7 @@ def percent_rank() -> Column: return _invoke_function("percent_rank") -@try_remote_functions +@_try_remote_functions def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column: """ .. versionadded:: 1.3.0 @@ -4601,7 +4171,7 @@ def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Col return approx_count_distinct(col, rsd) -@try_remote_functions +@_try_remote_functions def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column: """ This aggregate function returns a new :class:`~pyspark.sql.Column`, which estimates @@ -4626,7 +4196,7 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C A new Column object representing the approximate unique count. See Also - ---------- + -------- :meth:`pyspark.sql.functions.count_distinct` Examples @@ -4686,7 +4256,7 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C return _invoke_function("approx_count_distinct", _to_java_column(col), rsd) -@try_remote_functions +@_try_remote_functions def broadcast(df: DataFrame) -> DataFrame: """ Marks a DataFrame as small enough for use in broadcast joins. @@ -4716,11 +4286,11 @@ def broadcast(df: DataFrame) -> DataFrame: +-----+---+ """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return DataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf), df.sparkSession) -@try_remote_functions +@_try_remote_functions def coalesce(*cols: "ColumnOrName") -> Column: """Returns the first column that is not null. @@ -4772,7 +4342,7 @@ def coalesce(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("coalesce", cols) -@try_remote_functions +@_try_remote_functions def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """Returns a new :class:`~pyspark.sql.Column` for the Pearson Correlation Coefficient for ``col1`` and ``col2``. @@ -4805,7 +4375,7 @@ def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("corr", col1, col2) -@try_remote_functions +@_try_remote_functions def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """Returns a new :class:`~pyspark.sql.Column` for the population covariance of ``col1`` and ``col2``. @@ -4838,7 +4408,7 @@ def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("covar_pop", col1, col2) -@try_remote_functions +@_try_remote_functions def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """Returns a new :class:`~pyspark.sql.Column` for the sample covariance of ``col1`` and ``col2``. @@ -4871,7 +4441,7 @@ def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("covar_samp", col1, col2) -@try_remote_functions +@_try_remote_functions def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column: """Returns a new :class:`~pyspark.sql.Column` for distinct count of ``col`` or ``cols``. @@ -4886,7 +4456,7 @@ def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column: return count_distinct(col, *cols) -@try_remote_functions +@_try_remote_functions def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column: """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``. @@ -4930,13 +4500,13 @@ def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column: | 4| +----------------------------+ """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function( "count_distinct", _to_java_column(col), _to_seq(sc, cols, _to_java_column) ) -@try_remote_functions +@_try_remote_functions def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column: """Aggregate function: returns the first value in a group. @@ -4990,7 +4560,7 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column: return _invoke_function("first", _to_java_column(col), ignorenulls) -@try_remote_functions +@_try_remote_functions def grouping(col: "ColumnOrName") -> Column: """ Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated @@ -5026,7 +4596,7 @@ def grouping(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("grouping", col) -@try_remote_functions +@_try_remote_functions def grouping_id(*cols: "ColumnOrName") -> Column: """ Aggregate function: returns the level of grouping, equals to @@ -5074,7 +4644,7 @@ def grouping_id(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("grouping_id", cols) -@try_remote_functions +@_try_remote_functions def count_min_sketch( col: "ColumnOrName", eps: "ColumnOrName", @@ -5115,7 +4685,7 @@ def count_min_sketch( return _invoke_function_over_columns("count_min_sketch", col, eps, confidence, seed) -@try_remote_functions +@_try_remote_functions def input_file_name() -> Column: """ Creates a string column for the file name of the current Spark task. @@ -5141,7 +4711,7 @@ def input_file_name() -> Column: return _invoke_function("input_file_name") -@try_remote_functions +@_try_remote_functions def isnan(col: "ColumnOrName") -> Column: """An expression that returns true if the column is NaN. @@ -5174,7 +4744,7 @@ def isnan(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("isnan", col) -@try_remote_functions +@_try_remote_functions def isnull(col: "ColumnOrName") -> Column: """An expression that returns true if the column is null. @@ -5207,7 +4777,7 @@ def isnull(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("isnull", col) -@try_remote_functions +@_try_remote_functions def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column: """Aggregate function: returns the last value in a group. @@ -5261,7 +4831,7 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column: return _invoke_function("last", _to_java_column(col), ignorenulls) -@try_remote_functions +@_try_remote_functions def monotonically_increasing_id() -> Column: """A column that generates monotonically increasing 64-bit integers. @@ -5310,7 +4880,7 @@ def monotonically_increasing_id() -> Column: return _invoke_function("monotonically_increasing_id") -@try_remote_functions +@_try_remote_functions def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """Returns col1 if it is not NaN, or col2 if col1 is NaN. @@ -5342,7 +4912,7 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("nanvl", col1, col2) -@try_remote_functions +@_try_remote_functions def percentile( col: "ColumnOrName", percentage: Union[Column, float, List[float], Tuple[float]], @@ -5391,7 +4961,7 @@ def percentile( | 2| 19.967859769284075| +---+--------------------+ """ - sc = get_active_spark_context() + sc = _get_active_spark_context() if isinstance(percentage, (list, tuple)): # A local list @@ -5414,7 +4984,7 @@ def percentile( return _invoke_function("percentile", _to_java_column(col), percentage, frequency) -@try_remote_functions +@_try_remote_functions def percentile_approx( col: "ColumnOrName", percentage: Union[Column, float, List[float], Tuple[float]], @@ -5468,7 +5038,7 @@ def percentile_approx( |-- key: long (nullable = true) |-- median: double (nullable = true) """ - sc = get_active_spark_context() + sc = _get_active_spark_context() if isinstance(percentage, (list, tuple)): # A local list @@ -5491,7 +5061,7 @@ def percentile_approx( return _invoke_function("percentile_approx", _to_java_column(col), percentage, accuracy) -@try_remote_functions +@_try_remote_functions def approx_percentile( col: "ColumnOrName", percentage: Union[Column, float, List[float], Tuple[float]], @@ -5542,7 +5112,7 @@ def approx_percentile( |-- key: long (nullable = true) |-- approx_percentile(value, 0.5, 1000000): double (nullable = true) """ - sc = get_active_spark_context() + sc = _get_active_spark_context() if isinstance(percentage, (list, tuple)): # A local list @@ -5565,7 +5135,7 @@ def approx_percentile( return _invoke_function("approx_percentile", _to_java_column(col), percentage, accuracy) -@try_remote_functions +@_try_remote_functions def rand(seed: Optional[int] = None) -> Column: """Generates a random column with independent and identically distributed (i.i.d.) samples uniformly distributed in [0.0, 1.0). @@ -5606,7 +5176,7 @@ def rand(seed: Optional[int] = None) -> Column: return _invoke_function("rand") -@try_remote_functions +@_try_remote_functions def randn(seed: Optional[int] = None) -> Column: """Generates a column with independent and identically distributed (i.i.d.) samples from the standard normal distribution. @@ -5647,7 +5217,7 @@ def randn(seed: Optional[int] = None) -> Column: return _invoke_function("randn") -@try_remote_functions +@_try_remote_functions def round(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Column: """ Round the given value to `scale` decimal places using HALF_UP rounding mode if `scale` >= 0 @@ -5698,7 +5268,7 @@ def round(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co return _invoke_function_over_columns("round", col, scale) -@try_remote_functions +@_try_remote_functions def bround(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Column: """ Round the given value to `scale` decimal places using HALF_EVEN rounding mode if `scale` >= 0 @@ -5749,7 +5319,7 @@ def bround(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> C return _invoke_function_over_columns("bround", col, scale) -@try_remote_functions +@_try_remote_functions def shiftLeft(col: "ColumnOrName", numBits: int) -> Column: """Shift the given value numBits left. @@ -5765,7 +5335,7 @@ def shiftLeft(col: "ColumnOrName", numBits: int) -> Column: return shiftleft(col, numBits) -@try_remote_functions +@_try_remote_functions def shiftleft(col: "ColumnOrName", numBits: int) -> Column: """Shift the given value numBits left. @@ -5794,7 +5364,7 @@ def shiftleft(col: "ColumnOrName", numBits: int) -> Column: return _invoke_function("shiftleft", _to_java_column(col), numBits) -@try_remote_functions +@_try_remote_functions def shiftRight(col: "ColumnOrName", numBits: int) -> Column: """(Signed) shift the given value numBits right. @@ -5810,7 +5380,7 @@ def shiftRight(col: "ColumnOrName", numBits: int) -> Column: return shiftright(col, numBits) -@try_remote_functions +@_try_remote_functions def shiftright(col: "ColumnOrName", numBits: int) -> Column: """(Signed) shift the given value numBits right. @@ -5839,7 +5409,7 @@ def shiftright(col: "ColumnOrName", numBits: int) -> Column: return _invoke_function("shiftright", _to_java_column(col), numBits) -@try_remote_functions +@_try_remote_functions def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column: """Unsigned shift the given value numBits right. @@ -5855,7 +5425,7 @@ def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column: return shiftrightunsigned(col, numBits) -@try_remote_functions +@_try_remote_functions def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column: """Unsigned shift the given value numBits right. @@ -5885,7 +5455,7 @@ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column: return _invoke_function("shiftrightunsigned", _to_java_column(col), numBits) -@try_remote_functions +@_try_remote_functions def spark_partition_id() -> Column: """A column for partition ID. @@ -5912,7 +5482,7 @@ def spark_partition_id() -> Column: return _invoke_function("spark_partition_id") -@try_remote_functions +@_try_remote_functions def expr(str: str) -> Column: """Parses the expression string into the column that it represents @@ -5955,7 +5525,7 @@ def struct(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) -> ... -@try_remote_functions +@_try_remote_functions def struct( *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]] ) -> Column: @@ -5989,7 +5559,7 @@ def struct( return _invoke_function_over_seq_of_columns("struct", cols) # type: ignore[arg-type] -@try_remote_functions +@_try_remote_functions def named_struct(*cols: "ColumnOrName") -> Column: """ Creates a struct with the given field names and values. @@ -6014,7 +5584,7 @@ def named_struct(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("named_struct", cols) -@try_remote_functions +@_try_remote_functions def greatest(*cols: "ColumnOrName") -> Column: """ Returns the greatest value of the list of column names, skipping null values. @@ -6049,7 +5619,7 @@ def greatest(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("greatest", cols) -@try_remote_functions +@_try_remote_functions def least(*cols: "ColumnOrName") -> Column: """ Returns the least value of the list of column names, skipping null values. @@ -6084,7 +5654,7 @@ def least(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("least", cols) -@try_remote_functions +@_try_remote_functions def when(condition: Column, value: Any) -> Column: """Evaluates a list of conditions and returns one of multiple possible result expressions. If :func:`pyspark.sql.Column.otherwise` is not invoked, None is returned for unmatched @@ -6149,7 +5719,7 @@ def log(arg1: float, arg2: "ColumnOrName") -> Column: ... -@try_remote_functions +@_try_remote_functions def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = None) -> Column: """Returns the first argument-based logarithm of the second argument. @@ -6202,7 +5772,7 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non return _invoke_function("log", arg1, _to_java_column(arg2)) -@try_remote_functions +@_try_remote_functions def ln(col: "ColumnOrName") -> Column: """Returns the natural logarithm of the argument. @@ -6231,7 +5801,7 @@ def ln(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("ln", col) -@try_remote_functions +@_try_remote_functions def log2(col: "ColumnOrName") -> Column: """Returns the base-2 logarithm of the argument. @@ -6263,7 +5833,7 @@ def log2(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("log2", col) -@try_remote_functions +@_try_remote_functions def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column: """ Convert a number in a string column from one base to another. @@ -6296,7 +5866,7 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column: return _invoke_function("conv", _to_java_column(col), fromBase, toBase) -@try_remote_functions +@_try_remote_functions def factorial(col: "ColumnOrName") -> Column: """ Computes the factorial of the given value. @@ -6328,7 +5898,7 @@ def factorial(col: "ColumnOrName") -> Column: # --------------- Window functions ------------------------ -@try_remote_functions +@_try_remote_functions def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column: """ Window function: returns the value that is `offset` rows before the current row, and @@ -6409,7 +5979,7 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> return _invoke_function("lag", _to_java_column(col), offset, default) -@try_remote_functions +@_try_remote_functions def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column: """ Window function: returns the value that is `offset` rows after the current row, and @@ -6490,7 +6060,7 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> return _invoke_function("lead", _to_java_column(col), offset, default) -@try_remote_functions +@_try_remote_functions def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = False) -> Column: """ Window function: returns the value that is the `offset`\\th row of the window frame @@ -6564,7 +6134,7 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa return _invoke_function("nth_value", _to_java_column(col), offset, ignoreNulls) -@try_remote_functions +@_try_remote_functions def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column: """Returns some value of `col` for a group of rows. @@ -6601,7 +6171,7 @@ def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = return _invoke_function_over_columns("any_value", col, ignoreNulls) -@try_remote_functions +@_try_remote_functions def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column: """Returns the first value of `col` for a group of rows. It will return the first non-null value it sees when `ignoreNulls` is set to true. If all values are null, then null is returned. @@ -6649,7 +6219,7 @@ def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] return _invoke_function_over_columns("first_value", col, ignoreNulls) -@try_remote_functions +@_try_remote_functions def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column: """Returns the last value of `col` for a group of rows. It will return the last non-null value it sees when `ignoreNulls` is set to true. If all values are null, then null is returned. @@ -6697,7 +6267,7 @@ def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = return _invoke_function_over_columns("last_value", col, ignoreNulls) -@try_remote_functions +@_try_remote_functions def count_if(col: "ColumnOrName") -> Column: """Returns the number of `TRUE` values for the `col`. @@ -6730,7 +6300,7 @@ def count_if(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("count_if", col) -@try_remote_functions +@_try_remote_functions def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column: """Computes a histogram on numeric 'col' using nb bins. The return value is an array of (x,y) pairs representing the centers of the @@ -6774,7 +6344,7 @@ def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column: return _invoke_function_over_columns("histogram_numeric", col, nBins) -@try_remote_functions +@_try_remote_functions def ntile(n: int) -> Column: """ Window function: returns the ntile group id (from 1 to `n` inclusive) @@ -6835,7 +6405,7 @@ def ntile(n: int) -> Column: # ---------------------- Date/Timestamp functions ------------------------------ -@try_remote_functions +@_try_remote_functions def curdate() -> Column: """ Returns the current date at the start of query evaluation as a :class:`DateType` column. @@ -6861,7 +6431,7 @@ def curdate() -> Column: return _invoke_function("curdate") -@try_remote_functions +@_try_remote_functions def current_date() -> Column: """ Returns the current date at the start of query evaluation as a :class:`DateType` column. @@ -6890,7 +6460,7 @@ def current_date() -> Column: return _invoke_function("current_date") -@try_remote_functions +@_try_remote_functions def current_timezone() -> Column: """ Returns the current session local timezone. @@ -6916,7 +6486,7 @@ def current_timezone() -> Column: return _invoke_function("current_timezone") -@try_remote_functions +@_try_remote_functions def current_timestamp() -> Column: """ Returns the current timestamp at the start of query evaluation as a :class:`TimestampType` @@ -6945,7 +6515,7 @@ def current_timestamp() -> Column: return _invoke_function("current_timestamp") -@try_remote_functions +@_try_remote_functions def now() -> Column: """ Returns the current timestamp at the start of query evaluation. @@ -6970,7 +6540,7 @@ def now() -> Column: return _invoke_function("current_timestamp") -@try_remote_functions +@_try_remote_functions def localtimestamp() -> Column: """ Returns the current timestamp without time zone at the start of query evaluation @@ -7000,7 +6570,7 @@ def localtimestamp() -> Column: return _invoke_function("localtimestamp") -@try_remote_functions +@_try_remote_functions def date_format(date: "ColumnOrName", format: str) -> Column: """ Converts a date/timestamp/string to a value of string in the format specified by the date @@ -7041,7 +6611,7 @@ def date_format(date: "ColumnOrName", format: str) -> Column: return _invoke_function("date_format", _to_java_column(date), format) -@try_remote_functions +@_try_remote_functions def year(col: "ColumnOrName") -> Column: """ Extract the year of a given date/timestamp as integer. @@ -7070,7 +6640,7 @@ def year(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("year", col) -@try_remote_functions +@_try_remote_functions def quarter(col: "ColumnOrName") -> Column: """ Extract the quarter of a given date/timestamp as integer. @@ -7099,7 +6669,7 @@ def quarter(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("quarter", col) -@try_remote_functions +@_try_remote_functions def month(col: "ColumnOrName") -> Column: """ Extract the month of a given date/timestamp as integer. @@ -7128,7 +6698,7 @@ def month(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("month", col) -@try_remote_functions +@_try_remote_functions def dayofweek(col: "ColumnOrName") -> Column: """ Extract the day of the week of a given date/timestamp as integer. @@ -7158,7 +6728,7 @@ def dayofweek(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("dayofweek", col) -@try_remote_functions +@_try_remote_functions def dayofmonth(col: "ColumnOrName") -> Column: """ Extract the day of the month of a given date/timestamp as integer. @@ -7187,7 +6757,7 @@ def dayofmonth(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("dayofmonth", col) -@try_remote_functions +@_try_remote_functions def day(col: "ColumnOrName") -> Column: """ Extract the day of the month of a given date/timestamp as integer. @@ -7213,7 +6783,7 @@ def day(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("day", col) -@try_remote_functions +@_try_remote_functions def dayofyear(col: "ColumnOrName") -> Column: """ Extract the day of the year of a given date/timestamp as integer. @@ -7242,7 +6812,7 @@ def dayofyear(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("dayofyear", col) -@try_remote_functions +@_try_remote_functions def hour(col: "ColumnOrName") -> Column: """ Extract the hours of a given timestamp as integer. @@ -7272,7 +6842,7 @@ def hour(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("hour", col) -@try_remote_functions +@_try_remote_functions def minute(col: "ColumnOrName") -> Column: """ Extract the minutes of a given timestamp as integer. @@ -7302,7 +6872,7 @@ def minute(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("minute", col) -@try_remote_functions +@_try_remote_functions def second(col: "ColumnOrName") -> Column: """ Extract the seconds of a given date as integer. @@ -7332,7 +6902,7 @@ def second(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("second", col) -@try_remote_functions +@_try_remote_functions def weekofyear(col: "ColumnOrName") -> Column: """ Extract the week number of a given date as integer. @@ -7363,7 +6933,7 @@ def weekofyear(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("weekofyear", col) -@try_remote_functions +@_try_remote_functions def weekday(col: "ColumnOrName") -> Column: """ Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). @@ -7393,7 +6963,7 @@ def weekday(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("weekday", col) -@try_remote_functions +@_try_remote_functions def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column: """ Extracts a part of the date/timestamp or interval source. @@ -7429,7 +6999,7 @@ def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column: return _invoke_function_over_columns("extract", field, source) -@try_remote_functions +@_try_remote_functions def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column: """ Extracts a part of the date/timestamp or interval source. @@ -7466,7 +7036,7 @@ def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column: return _invoke_function_over_columns("date_part", field, source) -@try_remote_functions +@_try_remote_functions def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column: """ Extracts a part of the date/timestamp or interval source. @@ -7503,7 +7073,7 @@ def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column: return _invoke_function_over_columns("datepart", field, source) -@try_remote_functions +@_try_remote_functions def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") -> Column: """ Returns a column with a date built from the year, month and day columns. @@ -7536,7 +7106,7 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") return _invoke_function_over_columns("make_date", year, month, day) -@try_remote_functions +@_try_remote_functions def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `days` days after `start`. If `days` is a negative value @@ -7574,7 +7144,7 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("date_add", start, days) -@try_remote_functions +@_try_remote_functions def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `days` days after `start`. If `days` is a negative value @@ -7631,7 +7201,7 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("dateadd", start, days) -@try_remote_functions +@_try_remote_functions def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `days` days before `start`. If `days` is a negative value @@ -7669,7 +7239,7 @@ def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("date_sub", start, days) -@try_remote_functions +@_try_remote_functions def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column: """ Returns the number of days from `start` to `end`. @@ -7700,7 +7270,7 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column: return _invoke_function_over_columns("datediff", end, start) -@try_remote_functions +@_try_remote_functions def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column: """ Returns the number of days from `start` to `end`. @@ -7728,7 +7298,7 @@ def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column: return _invoke_function_over_columns("date_diff", end, start) -@try_remote_functions +@_try_remote_functions def date_from_unix_date(days: "ColumnOrName") -> Column: """ Create date from the number of `days` since 1970-01-01. @@ -7758,7 +7328,7 @@ def date_from_unix_date(days: "ColumnOrName") -> Column: return _invoke_function_over_columns("date_from_unix_date", days) -@try_remote_functions +@_try_remote_functions def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `months` months after `start`. If `months` is a negative value @@ -7796,7 +7366,7 @@ def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Col return _invoke_function_over_columns("add_months", start, months) -@try_remote_functions +@_try_remote_functions def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool = True) -> Column: """ Returns number of months between dates date1 and date2. @@ -7837,7 +7407,7 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool ) -@try_remote_functions +@_try_remote_functions def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column: """Converts a :class:`~pyspark.sql.Column` into :class:`pyspark.sql.types.DateType` using the optionally specified format. Specify formats according to `datetime pattern`_. @@ -7879,7 +7449,7 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column: return _invoke_function("to_date", _to_java_column(col), format) -@try_remote_functions +@_try_remote_functions def unix_date(col: "ColumnOrName") -> Column: """Returns the number of days since 1970-01-01. @@ -7896,7 +7466,7 @@ def unix_date(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_date", col) -@try_remote_functions +@_try_remote_functions def unix_micros(col: "ColumnOrName") -> Column: """Returns the number of microseconds since 1970-01-01 00:00:00 UTC. @@ -7913,7 +7483,7 @@ def unix_micros(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_micros", col) -@try_remote_functions +@_try_remote_functions def unix_millis(col: "ColumnOrName") -> Column: """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of precision. @@ -7931,7 +7501,7 @@ def unix_millis(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_millis", col) -@try_remote_functions +@_try_remote_functions def unix_seconds(col: "ColumnOrName") -> Column: """Returns the number of seconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of precision. @@ -7959,7 +7529,7 @@ def to_timestamp(col: "ColumnOrName", format: str) -> Column: ... -@try_remote_functions +@_try_remote_functions def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column: """Converts a :class:`~pyspark.sql.Column` into :class:`pyspark.sql.types.TimestampType` using the optionally specified format. Specify formats according to `datetime pattern`_. @@ -8001,7 +7571,7 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column: return _invoke_function("to_timestamp", _to_java_column(col), format) -@try_remote_functions +@_try_remote_functions def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: """ Parses the `col` with the `format` to a timestamp. The function always @@ -8032,7 +7602,7 @@ def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = Non return _invoke_function_over_columns("try_to_timestamp", col) -@try_remote_functions +@_try_remote_functions def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns a string array of values within the nodes of xml that match the XPath expression. @@ -8049,7 +7619,7 @@ def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns true if the XPath expression evaluates to true, or if a matching node is found. @@ -8065,7 +7635,7 @@ def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_boolean", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns a double value, the value zero if no match is found, @@ -8082,7 +7652,7 @@ def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_double", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_number(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns a double value, the value zero if no match is found, @@ -8105,7 +7675,7 @@ def xpath_number(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_number", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns a float value, the value zero if no match is found, @@ -8122,7 +7692,7 @@ def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_float", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns an integer value, or the value zero if no match is found, @@ -8139,7 +7709,7 @@ def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_int", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns a long integer value, or the value zero if no match is found, @@ -8156,7 +7726,7 @@ def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_long", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns a short integer value, or the value zero if no match is found, @@ -8173,7 +7743,7 @@ def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_short", xml, path) -@try_remote_functions +@_try_remote_functions def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ Returns the text contents of the first xml node that matches the XPath expression. @@ -8189,7 +7759,7 @@ def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath_string", xml, path) -@try_remote_functions +@_try_remote_functions def trunc(date: "ColumnOrName", format: str) -> Column: """ Returns date truncated to the unit specified by the format. @@ -8224,7 +7794,7 @@ def trunc(date: "ColumnOrName", format: str) -> Column: return _invoke_function("trunc", _to_java_column(date), format) -@try_remote_functions +@_try_remote_functions def date_trunc(format: str, timestamp: "ColumnOrName") -> Column: """ Returns timestamp truncated to the unit specified by the format. @@ -8261,7 +7831,7 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column: return _invoke_function("date_trunc", format, _to_java_column(timestamp)) -@try_remote_functions +@_try_remote_functions def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column: """ Returns the first date which is later than the value of the date column @@ -8294,7 +7864,7 @@ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column: return _invoke_function("next_day", _to_java_column(date), dayOfWeek) -@try_remote_functions +@_try_remote_functions def last_day(date: "ColumnOrName") -> Column: """ Returns the last day of the month which the given date belongs to. @@ -8323,7 +7893,7 @@ def last_day(date: "ColumnOrName") -> Column: return _invoke_function("last_day", _to_java_column(date)) -@try_remote_functions +@_try_remote_functions def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss") -> Column: """ Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string @@ -8368,7 +7938,7 @@ def unix_timestamp() -> Column: ... -@try_remote_functions +@_try_remote_functions def unix_timestamp( timestamp: Optional["ColumnOrName"] = None, format: str = "yyyy-MM-dd HH:mm:ss" ) -> Column: @@ -8409,7 +7979,7 @@ def unix_timestamp( return _invoke_function("unix_timestamp", _to_java_column(timestamp), format) -@try_remote_functions +@_try_remote_functions def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column: """ This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function @@ -8463,7 +8033,7 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column: return _invoke_function("from_utc_timestamp", _to_java_column(timestamp), tz) -@try_remote_functions +@_try_remote_functions def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column: """ This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function @@ -8517,7 +8087,7 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column: return _invoke_function("to_utc_timestamp", _to_java_column(timestamp), tz) -@try_remote_functions +@_try_remote_functions def timestamp_seconds(col: "ColumnOrName") -> Column: """ Converts the number of seconds from the Unix epoch (1970-01-01T00:00:00Z) @@ -8558,7 +8128,7 @@ def timestamp_seconds(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("timestamp_seconds", col) -@try_remote_functions +@_try_remote_functions def timestamp_millis(col: "ColumnOrName") -> Column: """ Creates timestamp from the number of milliseconds since UTC epoch. @@ -8593,7 +8163,7 @@ def timestamp_millis(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("timestamp_millis", col) -@try_remote_functions +@_try_remote_functions def timestamp_micros(col: "ColumnOrName") -> Column: """ Creates timestamp from the number of microseconds since UTC epoch. @@ -8628,7 +8198,7 @@ def timestamp_micros(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("timestamp_micros", col) -@try_remote_functions +@_try_remote_functions def window( timeColumn: "ColumnOrName", windowDuration: str, @@ -8721,7 +8291,7 @@ def check_string_field(field, fieldName): # type: ignore[no-untyped-def] return _invoke_function("window", time_col, windowDuration) -@try_remote_functions +@_try_remote_functions def window_time( windowColumn: "ColumnOrName", ) -> Column: @@ -8772,7 +8342,7 @@ def window_time( return _invoke_function("window_time", window_col) -@try_remote_functions +@_try_remote_functions def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column: """ Generates session window given a timestamp specifying column. @@ -8837,7 +8407,7 @@ def check_field(field: Union[Column, str], fieldName: str) -> None: return _invoke_function("session_window", time_col, gap_duration) -@try_remote_functions +@_try_remote_functions def to_unix_timestamp( timestamp: "ColumnOrName", format: Optional["ColumnOrName"] = None, @@ -8868,7 +8438,7 @@ def to_unix_timestamp( return _invoke_function_over_columns("to_unix_timestamp", timestamp) -@try_remote_functions +@_try_remote_functions def to_timestamp_ltz( timestamp: "ColumnOrName", format: Optional["ColumnOrName"] = None, @@ -8904,7 +8474,7 @@ def to_timestamp_ltz( return _invoke_function_over_columns("to_timestamp_ltz", timestamp) -@try_remote_functions +@_try_remote_functions def to_timestamp_ntz( timestamp: "ColumnOrName", format: Optional["ColumnOrName"] = None, @@ -8943,7 +8513,7 @@ def to_timestamp_ntz( # ---------------------------- misc functions ---------------------------------- -@try_remote_functions +@_try_remote_functions def current_catalog() -> Column: """Returns the current catalog. @@ -8961,7 +8531,7 @@ def current_catalog() -> Column: return _invoke_function("current_catalog") -@try_remote_functions +@_try_remote_functions def current_database() -> Column: """Returns the current database. @@ -8979,7 +8549,7 @@ def current_database() -> Column: return _invoke_function("current_database") -@try_remote_functions +@_try_remote_functions def current_schema() -> Column: """Returns the current database. @@ -8998,7 +8568,7 @@ def current_schema() -> Column: return _invoke_function("current_schema") -@try_remote_functions +@_try_remote_functions def current_user() -> Column: """Returns the current database. @@ -9016,7 +8586,7 @@ def current_user() -> Column: return _invoke_function("current_user") -@try_remote_functions +@_try_remote_functions def user() -> Column: """Returns the current database. @@ -9035,7 +8605,7 @@ def user() -> Column: return _invoke_function("user") -@try_remote_functions +@_try_remote_functions def crc32(col: "ColumnOrName") -> Column: """ Calculates the cyclic redundancy check value (CRC32) of a binary column and @@ -9064,7 +8634,7 @@ def crc32(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("crc32", col) -@try_remote_functions +@_try_remote_functions def md5(col: "ColumnOrName") -> Column: """Calculates the MD5 digest and returns the value as a 32 character hex string. @@ -9091,7 +8661,7 @@ def md5(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("md5", col) -@try_remote_functions +@_try_remote_functions def sha1(col: "ColumnOrName") -> Column: """Returns the hex string result of SHA-1. @@ -9118,7 +8688,7 @@ def sha1(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sha1", col) -@try_remote_functions +@_try_remote_functions def sha2(col: "ColumnOrName", numBits: int) -> Column: """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). The numBits indicates the desired bit length of the result, which must have a @@ -9156,7 +8726,7 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column: return _invoke_function("sha2", _to_java_column(col), numBits) -@try_remote_functions +@_try_remote_functions def hash(*cols: "ColumnOrName") -> Column: """Calculates the hash code of given columns, and returns the result as an int column. @@ -9200,7 +8770,7 @@ def hash(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("hash", cols) -@try_remote_functions +@_try_remote_functions def xxhash64(*cols: "ColumnOrName") -> Column: """Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm, and returns the result as a long column. The hash computation uses an initial seed of 42. @@ -9245,7 +8815,7 @@ def xxhash64(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("xxhash64", cols) -@try_remote_functions +@_try_remote_functions def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column: """ Returns `null` if the input column is `true`; throws an exception @@ -9296,7 +8866,7 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None return _invoke_function("assert_true", _to_java_column(col), errMsg) -@try_remote_functions +@_try_remote_functions def raise_error(errMsg: Union[Column, str]) -> Column: """ Throws an exception with the provided error message. @@ -9339,7 +8909,7 @@ def raise_error(errMsg: Union[Column, str]) -> Column: # ---------------------- String/Binary functions ------------------------------ -@try_remote_functions +@_try_remote_functions def upper(col: "ColumnOrName") -> Column: """ Converts a string expression to upper case. @@ -9374,7 +8944,7 @@ def upper(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("upper", col) -@try_remote_functions +@_try_remote_functions def lower(col: "ColumnOrName") -> Column: """ Converts a string expression to lower case. @@ -9409,7 +8979,7 @@ def lower(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("lower", col) -@try_remote_functions +@_try_remote_functions def ascii(col: "ColumnOrName") -> Column: """ Computes the numeric value of the first character of the string column. @@ -9444,7 +9014,7 @@ def ascii(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("ascii", col) -@try_remote_functions +@_try_remote_functions def base64(col: "ColumnOrName") -> Column: """ Computes the BASE64 encoding of a binary column and returns it as a string column. @@ -9479,7 +9049,7 @@ def base64(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("base64", col) -@try_remote_functions +@_try_remote_functions def unbase64(col: "ColumnOrName") -> Column: """ Decodes a BASE64 encoded string column and returns it as a binary column. @@ -9516,7 +9086,7 @@ def unbase64(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unbase64", col) -@try_remote_functions +@_try_remote_functions def ltrim(col: "ColumnOrName") -> Column: """ Trim the spaces from left end for the specified string value. @@ -9551,7 +9121,7 @@ def ltrim(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("ltrim", col) -@try_remote_functions +@_try_remote_functions def rtrim(col: "ColumnOrName") -> Column: """ Trim the spaces from right end for the specified string value. @@ -9586,7 +9156,7 @@ def rtrim(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("rtrim", col) -@try_remote_functions +@_try_remote_functions def trim(col: "ColumnOrName") -> Column: """ Trim the spaces from both ends for the specified string column. @@ -9621,7 +9191,7 @@ def trim(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("trim", col) -@try_remote_functions +@_try_remote_functions def concat_ws(sep: str, *cols: "ColumnOrName") -> Column: """ Concatenates multiple input string columns together into a single string column, @@ -9650,11 +9220,11 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column: >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect() [Row(s='abcd-123')] """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("concat_ws", sep, _to_seq(sc, cols, _to_java_column)) -@try_remote_functions +@_try_remote_functions def decode(col: "ColumnOrName", charset: str) -> Column: """ Computes the first argument into a string from a binary using the provided character set @@ -9690,7 +9260,7 @@ def decode(col: "ColumnOrName", charset: str) -> Column: return _invoke_function("decode", _to_java_column(col), charset) -@try_remote_functions +@_try_remote_functions def encode(col: "ColumnOrName", charset: str) -> Column: """ Computes the first argument into a binary from a string using the provided character set @@ -9726,7 +9296,7 @@ def encode(col: "ColumnOrName", charset: str) -> Column: return _invoke_function("encode", _to_java_column(col), charset) -@try_remote_functions +@_try_remote_functions def format_number(col: "ColumnOrName", d: int) -> Column: """ Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places @@ -9755,7 +9325,7 @@ def format_number(col: "ColumnOrName", d: int) -> Column: return _invoke_function("format_number", _to_java_column(col), d) -@try_remote_functions +@_try_remote_functions def format_string(format: str, *cols: "ColumnOrName") -> Column: """ Formats the arguments in printf-style and returns the result as a string column. @@ -9783,11 +9353,11 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column: >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect() [Row(v='5 hello')] """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("format_string", format, _to_seq(sc, cols, _to_java_column)) -@try_remote_functions +@_try_remote_functions def instr(str: "ColumnOrName", substr: str) -> Column: """ Locate the position of the first occurrence of substr column in the given string. @@ -9824,7 +9394,7 @@ def instr(str: "ColumnOrName", substr: str) -> Column: return _invoke_function("instr", _to_java_column(str), substr) -@try_remote_functions +@_try_remote_functions def overlay( src: "ColumnOrName", replace: "ColumnOrName", @@ -9884,7 +9454,7 @@ def overlay( return _invoke_function("overlay", _to_java_column(src), _to_java_column(replace), pos, len) -@try_remote_functions +@_try_remote_functions def sentences( string: "ColumnOrName", language: Optional["ColumnOrName"] = None, @@ -9938,7 +9508,7 @@ def sentences( return _invoke_function_over_columns("sentences", string, language, country) -@try_remote_functions +@_try_remote_functions def substring(str: "ColumnOrName", pos: int, len: int) -> Column: """ Substring starts at `pos` and is of length `len` when str is String type or @@ -9977,7 +9547,7 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column: return _invoke_function("substring", _to_java_column(str), pos, len) -@try_remote_functions +@_try_remote_functions def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column: """ Returns the substring from string str before count occurrences of the delimiter delim. @@ -10015,7 +9585,7 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column: return _invoke_function("substring_index", _to_java_column(str), delim, count) -@try_remote_functions +@_try_remote_functions def levenshtein( left: "ColumnOrName", right: "ColumnOrName", threshold: Optional[int] = None ) -> Column: @@ -10060,7 +9630,7 @@ def levenshtein( ) -@try_remote_functions +@_try_remote_functions def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column: """ Locate the position of the first occurrence of substr in a string column, after position pos. @@ -10098,7 +9668,7 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column: return _invoke_function("locate", substr, _to_java_column(str), pos) -@try_remote_functions +@_try_remote_functions def lpad(col: "ColumnOrName", len: int, pad: str) -> Column: """ Left-pad the string column to width `len` with `pad`. @@ -10131,7 +9701,7 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column: return _invoke_function("lpad", _to_java_column(col), len, pad) -@try_remote_functions +@_try_remote_functions def rpad(col: "ColumnOrName", len: int, pad: str) -> Column: """ Right-pad the string column to width `len` with `pad`. @@ -10164,7 +9734,7 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column: return _invoke_function("rpad", _to_java_column(col), len, pad) -@try_remote_functions +@_try_remote_functions def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column: """ Repeats a string column n times, and returns it as a new string column. @@ -10225,7 +9795,7 @@ def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("repeat", col, n) -@try_remote_functions +@_try_remote_functions def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column: """ Splits str around matches of the given pattern. @@ -10270,7 +9840,7 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column: return _invoke_function("split", _to_java_column(str), pattern, limit) -@try_remote_functions +@_try_remote_functions def rlike(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: r"""Returns true if `str` matches the Java regex `regexp`, or false otherwise. @@ -10301,7 +9871,7 @@ def rlike(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: return _invoke_function_over_columns("rlike", str, regexp) -@try_remote_functions +@_try_remote_functions def regexp(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: r"""Returns true if `str` matches the Java regex `regexp`, or false otherwise. @@ -10354,7 +9924,7 @@ def regexp(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: return _invoke_function_over_columns("regexp", str, regexp) -@try_remote_functions +@_try_remote_functions def regexp_like(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: r"""Returns true if `str` matches the Java regex `regexp`, or false otherwise. @@ -10407,7 +9977,7 @@ def regexp_like(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: return _invoke_function_over_columns("regexp_like", str, regexp) -@try_remote_functions +@_try_remote_functions def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: r"""Returns a count of the number of times that the Java regex pattern `regexp` is matched in the string `str`. @@ -10439,7 +10009,7 @@ def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: return _invoke_function_over_columns("regexp_count", str, regexp) -@try_remote_functions +@_try_remote_functions def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column: r"""Extract a specific group matched by the Java regex `regexp`, from the specified string column. If the regex did not match, or the specified group did not match, an empty string is returned. @@ -10478,7 +10048,7 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column: return _invoke_function("regexp_extract", _to_java_column(str), pattern, idx) -@try_remote_functions +@_try_remote_functions def regexp_extract_all( str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None ) -> Column: @@ -10520,7 +10090,7 @@ def regexp_extract_all( return _invoke_function_over_columns("regexp_extract_all", str, regexp, idx) -@try_remote_functions +@_try_remote_functions def regexp_replace( string: "ColumnOrName", pattern: Union[str, Column], replacement: Union[str, Column] ) -> Column: @@ -10564,7 +10134,7 @@ def regexp_replace( return _invoke_function("regexp_replace", _to_java_column(string), pattern_col, replacement_col) -@try_remote_functions +@_try_remote_functions def regexp_substr(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: r"""Returns the substring that matches the Java regex `regexp` within the string `str`. If the regular expression is not found, the result is null. @@ -10596,7 +10166,7 @@ def regexp_substr(str: "ColumnOrName", regexp: "ColumnOrName") -> Column: return _invoke_function_over_columns("regexp_substr", str, regexp) -@try_remote_functions +@_try_remote_functions def regexp_instr( str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None ) -> Column: @@ -10638,7 +10208,7 @@ def regexp_instr( return _invoke_function_over_columns("regexp_instr", str, regexp, idx) -@try_remote_functions +@_try_remote_functions def initcap(col: "ColumnOrName") -> Column: """Translate the first letter of each word to upper case in the sentence. @@ -10665,7 +10235,7 @@ def initcap(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("initcap", col) -@try_remote_functions +@_try_remote_functions def soundex(col: "ColumnOrName") -> Column: """ Returns the SoundEx encoding for a string @@ -10694,7 +10264,7 @@ def soundex(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("soundex", col) -@try_remote_functions +@_try_remote_functions def bin(col: "ColumnOrName") -> Column: """Returns the string representation of the binary value of the given column. @@ -10722,7 +10292,7 @@ def bin(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bin", col) -@try_remote_functions +@_try_remote_functions def hex(col: "ColumnOrName") -> Column: """Computes hex value of the given column, which could be :class:`pyspark.sql.types.StringType`, :class:`pyspark.sql.types.BinaryType`, :class:`pyspark.sql.types.IntegerType` or @@ -10751,7 +10321,7 @@ def hex(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("hex", col) -@try_remote_functions +@_try_remote_functions def unhex(col: "ColumnOrName") -> Column: """Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the byte representation of number. @@ -10779,7 +10349,7 @@ def unhex(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unhex", col) -@try_remote_functions +@_try_remote_functions def length(col: "ColumnOrName") -> Column: """Computes the character length of string data or number of bytes of binary data. The length of character data includes the trailing spaces. The length of binary data @@ -10808,7 +10378,7 @@ def length(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("length", col) -@try_remote_functions +@_try_remote_functions def octet_length(col: "ColumnOrName") -> Column: """ Calculates the byte length for the specified string column. @@ -10838,7 +10408,7 @@ def octet_length(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("octet_length", col) -@try_remote_functions +@_try_remote_functions def bit_length(col: "ColumnOrName") -> Column: """ Calculates the bit length for the specified string column. @@ -10868,7 +10438,7 @@ def bit_length(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bit_length", col) -@try_remote_functions +@_try_remote_functions def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column: """A function translate any character in the `srcCol` by a character in `matching`. The characters in `replace` is corresponding to the characters in `matching`. @@ -10904,7 +10474,7 @@ def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column: return _invoke_function("translate", _to_java_column(srcCol), matching, replace) -@try_remote_functions +@_try_remote_functions def to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: """ Converts the input `col` to a binary value based on the supplied `format`. @@ -10938,7 +10508,7 @@ def to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> C return _invoke_function_over_columns("to_binary", col) -@try_remote_functions +@_try_remote_functions def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column: """ Convert `col` to a string based on the `format`. @@ -10984,7 +10554,7 @@ def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column: return _invoke_function_over_columns("to_char", col, format) -@try_remote_functions +@_try_remote_functions def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column: """ Convert `col` to a string based on the `format`. @@ -11030,7 +10600,7 @@ def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column: return _invoke_function_over_columns("to_varchar", col, format) -@try_remote_functions +@_try_remote_functions def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column: """ Convert string 'col' to a number based on the string format 'format'. @@ -11071,7 +10641,7 @@ def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column: return _invoke_function_over_columns("to_number", col, format) -@try_remote_functions +@_try_remote_functions def replace( src: "ColumnOrName", search: "ColumnOrName", replace: Optional["ColumnOrName"] = None ) -> Column: @@ -11105,7 +10675,7 @@ def replace( return _invoke_function_over_columns("replace", src, search) -@try_remote_functions +@_try_remote_functions def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnOrName") -> Column: """ Splits `str` by delimiter and return requested part of the split (1-based). @@ -11134,7 +10704,7 @@ def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnO return _invoke_function_over_columns("split_part", src, delimiter, partNum) -@try_remote_functions +@_try_remote_functions def substr( str: "ColumnOrName", pos: "ColumnOrName", len: Optional["ColumnOrName"] = None ) -> Column: @@ -11181,7 +10751,7 @@ def substr( return _invoke_function_over_columns("substr", str, pos) -@try_remote_functions +@_try_remote_functions def parse_url( url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None ) -> Column: @@ -11217,7 +10787,7 @@ def parse_url( return _invoke_function_over_columns("parse_url", url, partToExtract) -@try_remote_functions +@_try_remote_functions def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column: """ Formats the arguments in printf-style and returns the result as a string column. @@ -11243,11 +10813,11 @@ def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column: | aa123cc| +---------------+ """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("printf", _to_java_column(format), _to_seq(sc, cols, _to_java_column)) -@try_remote_functions +@_try_remote_functions def url_decode(str: "ColumnOrName") -> Column: """ Decodes a `str` in 'application/x-www-form-urlencoded' format @@ -11269,7 +10839,7 @@ def url_decode(str: "ColumnOrName") -> Column: return _invoke_function_over_columns("url_decode", str) -@try_remote_functions +@_try_remote_functions def url_encode(str: "ColumnOrName") -> Column: """ Translates a string into 'application/x-www-form-urlencoded' format @@ -11291,7 +10861,7 @@ def url_encode(str: "ColumnOrName") -> Column: return _invoke_function_over_columns("url_encode", str) -@try_remote_functions +@_try_remote_functions def position( substr: "ColumnOrName", str: "ColumnOrName", start: Optional["ColumnOrName"] = None ) -> Column: @@ -11337,7 +10907,7 @@ def position( return _invoke_function_over_columns("position", substr, str) -@try_remote_functions +@_try_remote_functions def endswith(str: "ColumnOrName", suffix: "ColumnOrName") -> Column: """ Returns a boolean. The value is True if str ends with suffix. @@ -11375,7 +10945,7 @@ def endswith(str: "ColumnOrName", suffix: "ColumnOrName") -> Column: return _invoke_function_over_columns("endswith", str, suffix) -@try_remote_functions +@_try_remote_functions def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column: """ Returns a boolean. The value is True if str starts with prefix. @@ -11413,7 +10983,7 @@ def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column: return _invoke_function_over_columns("startswith", str, prefix) -@try_remote_functions +@_try_remote_functions def char(col: "ColumnOrName") -> Column: """ Returns the ASCII character having the binary equivalent to `col`. If col is larger than 256 the @@ -11439,7 +11009,7 @@ def char(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("char", col) -@try_remote_functions +@_try_remote_functions def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column: """ Remove the leading and trailing `trim` characters from `str`. @@ -11469,7 +11039,7 @@ def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column: return _invoke_function_over_columns("btrim", str) -@try_remote_functions +@_try_remote_functions def char_length(str: "ColumnOrName") -> Column: """ Returns the character length of string data or number of bytes of binary data. @@ -11496,7 +11066,7 @@ def char_length(str: "ColumnOrName") -> Column: return _invoke_function_over_columns("char_length", str) -@try_remote_functions +@_try_remote_functions def character_length(str: "ColumnOrName") -> Column: """ Returns the character length of string data or number of bytes of binary data. @@ -11523,7 +11093,7 @@ def character_length(str: "ColumnOrName") -> Column: return _invoke_function_over_columns("character_length", str) -@try_remote_functions +@_try_remote_functions def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: """ This is a special version of `to_binary` that performs the same operation, but returns a NULL @@ -11554,7 +11124,7 @@ def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) return _invoke_function_over_columns("try_to_binary", col) -@try_remote_functions +@_try_remote_functions def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column: """ Convert string 'col' to a number based on the string format `format`. Returns NULL if the @@ -11579,7 +11149,7 @@ def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_to_number", col, format) -@try_remote_functions +@_try_remote_functions def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column: """ Returns a boolean. The value is True if right is found inside left. @@ -11617,7 +11187,7 @@ def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column: return _invoke_function_over_columns("contains", left, right) -@try_remote_functions +@_try_remote_functions def elt(*inputs: "ColumnOrName") -> Column: """ Returns the `n`-th input, e.g., returns `input2` when `n` is 2. @@ -11638,11 +11208,11 @@ def elt(*inputs: "ColumnOrName") -> Column: >>> df.select(elt(df.a, df.b, df.c).alias('r')).collect() [Row(r='scala')] """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("elt", _to_seq(sc, inputs, _to_java_column)) -@try_remote_functions +@_try_remote_functions def find_in_set(str: "ColumnOrName", str_array: "ColumnOrName") -> Column: """ Returns the index (1-based) of the given string (`str`) in the comma-delimited @@ -11667,7 +11237,7 @@ def find_in_set(str: "ColumnOrName", str_array: "ColumnOrName") -> Column: return _invoke_function_over_columns("find_in_set", str, str_array) -@try_remote_functions +@_try_remote_functions def like( str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None ) -> Column: @@ -11717,7 +11287,7 @@ def like( return _invoke_function_over_columns("like", str, pattern) -@try_remote_functions +@_try_remote_functions def ilike( str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None ) -> Column: @@ -11767,7 +11337,7 @@ def ilike( return _invoke_function_over_columns("ilike", str, pattern) -@try_remote_functions +@_try_remote_functions def lcase(str: "ColumnOrName") -> Column: """ Returns `str` with all characters changed to lowercase. @@ -11792,7 +11362,7 @@ def lcase(str: "ColumnOrName") -> Column: return _invoke_function_over_columns("lcase", str) -@try_remote_functions +@_try_remote_functions def ucase(str: "ColumnOrName") -> Column: """ Returns `str` with all characters changed to uppercase. @@ -11817,7 +11387,7 @@ def ucase(str: "ColumnOrName") -> Column: return _invoke_function_over_columns("ucase", str) -@try_remote_functions +@_try_remote_functions def left(str: "ColumnOrName", len: "ColumnOrName") -> Column: """ Returns the leftmost `len`(`len` can be string type) characters from the string `str`, @@ -11841,7 +11411,7 @@ def left(str: "ColumnOrName", len: "ColumnOrName") -> Column: return _invoke_function_over_columns("left", str, len) -@try_remote_functions +@_try_remote_functions def right(str: "ColumnOrName", len: "ColumnOrName") -> Column: """ Returns the rightmost `len`(`len` can be string type) characters from the string `str`, @@ -11865,7 +11435,7 @@ def right(str: "ColumnOrName", len: "ColumnOrName") -> Column: return _invoke_function_over_columns("right", str, len) -@try_remote_functions +@_try_remote_functions def mask( col: "ColumnOrName", upperChar: Optional["ColumnOrName"] = None, @@ -11933,7 +11503,7 @@ def create_map(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]] ... -@try_remote_functions +@_try_remote_functions def create_map( *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]] ) -> Column: @@ -11963,7 +11533,7 @@ def create_map( return _invoke_function_over_seq_of_columns("map", cols) # type: ignore[arg-type] -@try_remote_functions +@_try_remote_functions def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """Creates a new map from two arrays. @@ -12013,7 +11583,7 @@ def array(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) -> ... -@try_remote_functions +@_try_remote_functions def array( *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]] ) -> Column: @@ -12052,7 +11622,7 @@ def array( return _invoke_function_over_seq_of_columns("array", cols) # type: ignore[arg-type] -@try_remote_functions +@_try_remote_functions def array_contains(col: "ColumnOrName", value: Any) -> Column: """ Collection function: returns null if the array is null, true if the array contains the @@ -12087,7 +11657,7 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column: return _invoke_function("array_contains", _to_java_column(col), value) -@try_remote_functions +@_try_remote_functions def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column: """ Collection function: returns true if the arrays contain any common non-null element; if not, @@ -12113,7 +11683,7 @@ def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column: return _invoke_function_over_columns("arrays_overlap", a1, a2) -@try_remote_functions +@_try_remote_functions def slice( x: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int] ) -> Column: @@ -12152,7 +11722,7 @@ def slice( return _invoke_function_over_columns("slice", x, start, length) -@try_remote_functions +@_try_remote_functions def array_join( col: "ColumnOrName", delimiter: str, null_replacement: Optional[str] = None ) -> Column: @@ -12187,14 +11757,14 @@ def array_join( >>> df.select(array_join(df.data, ",", "NULL").alias("joined")).collect() [Row(joined='a,b,c'), Row(joined='a,NULL')] """ - get_active_spark_context() + _get_active_spark_context() if null_replacement is None: return _invoke_function("array_join", _to_java_column(col), delimiter) else: return _invoke_function("array_join", _to_java_column(col), delimiter, null_replacement) -@try_remote_functions +@_try_remote_functions def concat(*cols: "ColumnOrName") -> Column: """ Concatenates multiple input columns together into a single column. @@ -12238,7 +11808,7 @@ def concat(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("concat", cols) -@try_remote_functions +@_try_remote_functions def array_position(col: "ColumnOrName", value: Any) -> Column: """ Collection function: Locates the position of the first occurrence of the given value @@ -12275,7 +11845,7 @@ def array_position(col: "ColumnOrName", value: Any) -> Column: return _invoke_function("array_position", _to_java_column(col), value) -@try_remote_functions +@_try_remote_functions def element_at(col: "ColumnOrName", extraction: Any) -> Column: """ Collection function: Returns element of array at given index in `extraction` if col is array. @@ -12323,7 +11893,7 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column: return _invoke_function_over_columns("element_at", col, lit(extraction)) -@try_remote_functions +@_try_remote_functions def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column: """ (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will @@ -12357,7 +11927,7 @@ def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column: return _invoke_function_over_columns("try_element_at", col, extraction) -@try_remote_functions +@_try_remote_functions def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: """ Collection function: Returns element of array at given (0-based) index. @@ -12430,7 +12000,7 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("get", col, index) -@try_remote_functions +@_try_remote_functions def array_prepend(col: "ColumnOrName", value: Any) -> Column: """ Collection function: Returns an array containing element as @@ -12460,7 +12030,7 @@ def array_prepend(col: "ColumnOrName", value: Any) -> Column: return _invoke_function_over_columns("array_prepend", col, lit(value)) -@try_remote_functions +@_try_remote_functions def array_remove(col: "ColumnOrName", element: Any) -> Column: """ Collection function: Remove all elements that equal to element from the given array. @@ -12491,7 +12061,7 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column: return _invoke_function("array_remove", _to_java_column(col), element) -@try_remote_functions +@_try_remote_functions def array_distinct(col: "ColumnOrName") -> Column: """ Collection function: removes duplicate values from the array. @@ -12520,7 +12090,7 @@ def array_distinct(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_distinct", col) -@try_remote_functions +@_try_remote_functions def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: Any) -> Column: """ Collection function: adds an item into a given array at a specified array index. @@ -12565,7 +12135,7 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An return _invoke_function_over_columns("array_insert", arr, pos, lit(value)) -@try_remote_functions +@_try_remote_functions def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Collection function: returns an array of the elements in the intersection of col1 and col2, @@ -12598,7 +12168,7 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_intersect", col1, col2) -@try_remote_functions +@_try_remote_functions def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Collection function: returns an array of the elements in the union of col1 and col2, @@ -12631,7 +12201,7 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_union", col1, col2) -@try_remote_functions +@_try_remote_functions def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Collection function: returns an array of the elements in col1 but not in col2, @@ -12664,7 +12234,7 @@ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_except", col1, col2) -@try_remote_functions +@_try_remote_functions def array_compact(col: "ColumnOrName") -> Column: """ Collection function: removes null values from the array. @@ -12694,7 +12264,7 @@ def array_compact(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_compact", col) -@try_remote_functions +@_try_remote_functions def array_append(col: "ColumnOrName", value: Any) -> Column: """ Collection function: returns an array of the elements in col1 along @@ -12730,7 +12300,7 @@ def array_append(col: "ColumnOrName", value: Any) -> Column: return _invoke_function_over_columns("array_append", col, lit(value)) -@try_remote_functions +@_try_remote_functions def explode(col: "ColumnOrName") -> Column: """ Returns a new row for each element in the given array or map. @@ -12775,7 +12345,7 @@ def explode(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("explode", col) -@try_remote_functions +@_try_remote_functions def posexplode(col: "ColumnOrName") -> Column: """ Returns a new row for each element with position in the given array or map. @@ -12814,7 +12384,7 @@ def posexplode(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("posexplode", col) -@try_remote_functions +@_try_remote_functions def inline(col: "ColumnOrName") -> Column: """ Explodes an array of structs into a table. @@ -12854,7 +12424,7 @@ def inline(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("inline", col) -@try_remote_functions +@_try_remote_functions def explode_outer(col: "ColumnOrName") -> Column: """ Returns a new row for each element in the given array or map. @@ -12905,7 +12475,7 @@ def explode_outer(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("explode_outer", col) -@try_remote_functions +@_try_remote_functions def posexplode_outer(col: "ColumnOrName") -> Column: """ Returns a new row for each element with position in the given array or map. @@ -12955,7 +12525,7 @@ def posexplode_outer(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("posexplode_outer", col) -@try_remote_functions +@_try_remote_functions def inline_outer(col: "ColumnOrName") -> Column: """ Explodes an array of structs into a table. @@ -13001,7 +12571,7 @@ def inline_outer(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("inline_outer", col) -@try_remote_functions +@_try_remote_functions def get_json_object(col: "ColumnOrName", path: str) -> Column: """ Extracts json object from a json string based on json `path` specified, and returns json string @@ -13035,7 +12605,7 @@ def get_json_object(col: "ColumnOrName", path: str) -> Column: return _invoke_function("get_json_object", _to_java_column(col), path) -@try_remote_functions +@_try_remote_functions def json_tuple(col: "ColumnOrName", *fields: str) -> Column: """Creates a new row for a json column according to the given field names. @@ -13063,11 +12633,11 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column: >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect() [Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)] """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("json_tuple", _to_java_column(col), _to_seq(sc, fields)) -@try_remote_functions +@_try_remote_functions def from_json( col: "ColumnOrName", schema: Union[ArrayType, StructType, Column, str], @@ -13136,7 +12706,7 @@ def from_json( return _invoke_function("from_json", _to_java_column(col), schema, _options_to_str(options)) -@try_remote_functions +@_try_remote_functions def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column: """ Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType` @@ -13194,7 +12764,7 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co return _invoke_function("to_json", _to_java_column(col), _options_to_str(options)) -@try_remote_functions +@_try_remote_functions def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column: """ Parses a JSON string and infers its schema in DDL format. @@ -13245,7 +12815,7 @@ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = Non return _invoke_function("schema_of_json", col, _options_to_str(options)) -@try_remote_functions +@_try_remote_functions def json_array_length(col: "ColumnOrName") -> Column: """ Returns the number of elements in the outermost JSON array. `NULL` is returned in case of @@ -13272,7 +12842,7 @@ def json_array_length(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("json_array_length", col) -@try_remote_functions +@_try_remote_functions def json_object_keys(col: "ColumnOrName") -> Column: """ Returns all the keys of the outermost JSON object as an array. If a valid JSON object is @@ -13300,7 +12870,7 @@ def json_object_keys(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("json_object_keys", col) -@try_remote_functions +@_try_remote_functions def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column: """ Parses a CSV string and infers its schema in DDL format. @@ -13347,7 +12917,7 @@ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) return _invoke_function("schema_of_csv", col, _options_to_str(options)) -@try_remote_functions +@_try_remote_functions def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column: """ Converts a column containing a :class:`StructType` into a CSV string. @@ -13386,7 +12956,7 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col return _invoke_function("to_csv", _to_java_column(col), _options_to_str(options)) -@try_remote_functions +@_try_remote_functions def size(col: "ColumnOrName") -> Column: """ Collection function: returns the length of the array or map stored in the column. @@ -13415,7 +12985,7 @@ def size(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("size", col) -@try_remote_functions +@_try_remote_functions def array_min(col: "ColumnOrName") -> Column: """ Collection function: returns the minimum value of the array. @@ -13444,7 +13014,7 @@ def array_min(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_min", col) -@try_remote_functions +@_try_remote_functions def array_max(col: "ColumnOrName") -> Column: """ Collection function: returns the maximum value of the array. @@ -13473,7 +13043,7 @@ def array_max(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_max", col) -@try_remote_functions +@_try_remote_functions def array_size(col: "ColumnOrName") -> Column: """ Returns the total number of elements in the array. The function returns null for null input. @@ -13499,7 +13069,7 @@ def array_size(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("array_size", col) -@try_remote_functions +@_try_remote_functions def cardinality(col: "ColumnOrName") -> Column: """ Collection function: returns the length of the array or map stored in the column. @@ -13533,7 +13103,7 @@ def cardinality(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("cardinality", col) -@try_remote_functions +@_try_remote_functions def sort_array(col: "ColumnOrName", asc: bool = True) -> Column: """ Collection function: sorts the input array in ascending or descending order according @@ -13570,7 +13140,7 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column: return _invoke_function("sort_array", _to_java_column(col), asc) -@try_remote_functions +@_try_remote_functions def array_sort( col: "ColumnOrName", comparator: Optional[Callable[[Column, Column], Column]] = None ) -> Column: @@ -13620,7 +13190,7 @@ def array_sort( return _invoke_higher_order_function("ArraySort", [col], [comparator]) -@try_remote_functions +@_try_remote_functions def shuffle(col: "ColumnOrName") -> Column: """ Collection function: Generates a random permutation of the given array. @@ -13653,7 +13223,7 @@ def shuffle(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("shuffle", col) -@try_remote_functions +@_try_remote_functions def reverse(col: "ColumnOrName") -> Column: """ Collection function: returns a reversed string or an array with reverse order of elements. @@ -13685,7 +13255,7 @@ def reverse(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("reverse", col) -@try_remote_functions +@_try_remote_functions def flatten(col: "ColumnOrName") -> Column: """ Collection function: creates a single array from an array of arrays. @@ -13728,7 +13298,7 @@ def flatten(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("flatten", col) -@try_remote_functions +@_try_remote_functions def map_contains_key(col: "ColumnOrName", value: Any) -> Column: """ Returns true if the map contains the key. @@ -13770,7 +13340,7 @@ def map_contains_key(col: "ColumnOrName", value: Any) -> Column: return _invoke_function("map_contains_key", _to_java_column(col), value) -@try_remote_functions +@_try_remote_functions def map_keys(col: "ColumnOrName") -> Column: """ Collection function: Returns an unordered array containing the keys of the map. @@ -13804,7 +13374,7 @@ def map_keys(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("map_keys", col) -@try_remote_functions +@_try_remote_functions def map_values(col: "ColumnOrName") -> Column: """ Collection function: Returns an unordered array containing the values of the map. @@ -13838,7 +13408,7 @@ def map_values(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("map_values", col) -@try_remote_functions +@_try_remote_functions def map_entries(col: "ColumnOrName") -> Column: """ Collection function: Returns an unordered array of all entries in the given map. @@ -13879,7 +13449,7 @@ def map_entries(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("map_entries", col) -@try_remote_functions +@_try_remote_functions def map_from_entries(col: "ColumnOrName") -> Column: """ Collection function: Converts an array of entries (key value struct types) to a map @@ -13914,7 +13484,7 @@ def map_from_entries(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("map_from_entries", col) -@try_remote_functions +@_try_remote_functions def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column: """ Collection function: creates an array containing a column repeated count times. @@ -13947,7 +13517,7 @@ def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Colu return _invoke_function_over_columns("array_repeat", col, count) -@try_remote_functions +@_try_remote_functions def arrays_zip(*cols: "ColumnOrName") -> Column: """ Collection function: Returns a merged array of structs in which the N-th struct contains all @@ -14001,7 +13571,7 @@ def map_concat(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]] ... -@try_remote_functions +@_try_remote_functions def map_concat( *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]] ) -> Column: @@ -14038,7 +13608,7 @@ def map_concat( return _invoke_function_over_seq_of_columns("map_concat", cols) # type: ignore[arg-type] -@try_remote_functions +@_try_remote_functions def sequence( start: "ColumnOrName", stop: "ColumnOrName", step: Optional["ColumnOrName"] = None ) -> Column: @@ -14081,7 +13651,7 @@ def sequence( return _invoke_function_over_columns("sequence", start, stop, step) -@try_remote_functions +@_try_remote_functions def from_csv( col: "ColumnOrName", schema: Union[Column, str], @@ -14130,7 +13700,7 @@ def from_csv( [Row(csv=Row(s='abc'))] """ - get_active_spark_context() + _get_active_spark_context() if isinstance(schema, str): schema = _create_column_from_literal(schema) elif isinstance(schema, Column): @@ -14156,7 +13726,7 @@ def _unresolved_named_lambda_variable(*name_parts: Any) -> Column: ---------- name_parts : str """ - sc = get_active_spark_context() + sc = _get_active_spark_context() name_parts_seq = _to_seq(sc, name_parts) expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions return Column( @@ -14206,7 +13776,7 @@ def _create_lambda(f: Callable) -> Callable: """ parameters = _get_lambda_parameters(f) - sc = get_active_spark_context() + sc = _get_active_spark_context() expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions argnames = ["x", "y", "z"] @@ -14247,7 +13817,7 @@ def _invoke_higher_order_function( :return: a Column """ - sc = get_active_spark_context() + sc = _get_active_spark_context() expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions expr = getattr(expressions, name) @@ -14267,7 +13837,7 @@ def transform(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Col ... -@try_remote_functions +@_try_remote_functions def transform( col: "ColumnOrName", f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]], @@ -14325,7 +13895,7 @@ def transform( return _invoke_higher_order_function("ArrayTransform", [col], [f]) -@try_remote_functions +@_try_remote_functions def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column: """ Returns whether a predicate holds for one or more elements in the array. @@ -14366,7 +13936,7 @@ def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column: return _invoke_higher_order_function("ArrayExists", [col], [f]) -@try_remote_functions +@_try_remote_functions def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column: """ Returns whether a predicate holds for every element in the array. @@ -14421,7 +13991,7 @@ def filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column ... -@try_remote_functions +@_try_remote_functions def filter( col: "ColumnOrName", f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]], @@ -14478,7 +14048,7 @@ def filter( return _invoke_higher_order_function("ArrayFilter", [col], [f]) -@try_remote_functions +@_try_remote_functions def aggregate( col: "ColumnOrName", initialValue: "ColumnOrName", @@ -14554,7 +14124,7 @@ def aggregate( return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge]) -@try_remote_functions +@_try_remote_functions def reduce( col: "ColumnOrName", initialValue: "ColumnOrName", @@ -14627,7 +14197,7 @@ def reduce( return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge]) -@try_remote_functions +@_try_remote_functions def zip_with( left: "ColumnOrName", right: "ColumnOrName", @@ -14682,7 +14252,7 @@ def zip_with( return _invoke_higher_order_function("ZipWith", [left, right], [f]) -@try_remote_functions +@_try_remote_functions def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column: """ Applies a function to every key-value pair in a map and returns @@ -14722,7 +14292,7 @@ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) - return _invoke_higher_order_function("TransformKeys", [col], [f]) -@try_remote_functions +@_try_remote_functions def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column: """ Applies a function to every key-value pair in a map and returns @@ -14762,7 +14332,7 @@ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column]) return _invoke_higher_order_function("TransformValues", [col], [f]) -@try_remote_functions +@_try_remote_functions def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column: """ Returns a map whose key-value pairs satisfy a predicate. @@ -14800,7 +14370,7 @@ def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Co return _invoke_higher_order_function("MapFilter", [col], [f]) -@try_remote_functions +@_try_remote_functions def map_zip_with( col1: "ColumnOrName", col2: "ColumnOrName", @@ -14848,7 +14418,7 @@ def map_zip_with( return _invoke_higher_order_function("MapZipWith", [col1, col2], [f]) -@try_remote_functions +@_try_remote_functions def str_to_map( text: "ColumnOrName", pairDelim: Optional["ColumnOrName"] = None, @@ -14893,7 +14463,7 @@ def str_to_map( # ---------------------- Partition transform functions -------------------------------- -@try_remote_functions +@_try_remote_functions def years(col: "ColumnOrName") -> Column: """ Partition transform function: A transform for timestamps and dates @@ -14930,7 +14500,7 @@ def years(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("years", col) -@try_remote_functions +@_try_remote_functions def months(col: "ColumnOrName") -> Column: """ Partition transform function: A transform for timestamps and dates @@ -14967,7 +14537,7 @@ def months(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("months", col) -@try_remote_functions +@_try_remote_functions def days(col: "ColumnOrName") -> Column: """ Partition transform function: A transform for timestamps and dates @@ -15004,7 +14574,7 @@ def days(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("days", col) -@try_remote_functions +@_try_remote_functions def hours(col: "ColumnOrName") -> Column: """ Partition transform function: A transform for timestamps @@ -15041,7 +14611,7 @@ def hours(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("hours", col) -@try_remote_functions +@_try_remote_functions def convert_timezone( sourceTz: Optional[Column], targetTz: Column, sourceTs: "ColumnOrName" ) -> Column: @@ -15092,7 +14662,7 @@ def convert_timezone( return _invoke_function_over_columns("convert_timezone", sourceTz, targetTz, sourceTs) -@try_remote_functions +@_try_remote_functions def make_dt_interval( days: Optional["ColumnOrName"] = None, hours: Optional["ColumnOrName"] = None, @@ -15167,7 +14737,7 @@ def make_dt_interval( return _invoke_function_over_columns("make_dt_interval", _days, _hours, _mins, _secs) -@try_remote_functions +@_try_remote_functions def make_interval( years: Optional["ColumnOrName"] = None, months: Optional["ColumnOrName"] = None, @@ -15274,7 +14844,7 @@ def make_interval( ) -@try_remote_functions +@_try_remote_functions def make_timestamp( years: "ColumnOrName", months: "ColumnOrName", @@ -15346,7 +14916,7 @@ def make_timestamp( ) -@try_remote_functions +@_try_remote_functions def make_timestamp_ltz( years: "ColumnOrName", months: "ColumnOrName", @@ -15418,7 +14988,7 @@ def make_timestamp_ltz( ) -@try_remote_functions +@_try_remote_functions def make_timestamp_ntz( years: "ColumnOrName", months: "ColumnOrName", @@ -15473,7 +15043,7 @@ def make_timestamp_ntz( ) -@try_remote_functions +@_try_remote_functions def make_ym_interval( years: Optional["ColumnOrName"] = None, months: Optional["ColumnOrName"] = None, @@ -15507,7 +15077,7 @@ def make_ym_interval( return _invoke_function_over_columns("make_ym_interval", _years, _months) -@try_remote_functions +@_try_remote_functions def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column: """ Partition transform function: A transform for any type that partitions @@ -15547,7 +15117,7 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column: message_parameters={"arg_name": "numBuckets", "arg_type": type(numBuckets).__name__}, ) - get_active_spark_context() + _get_active_spark_context() numBuckets = ( _create_column_from_literal(numBuckets) if isinstance(numBuckets, int) @@ -15556,7 +15126,7 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column: return _invoke_function("bucket", numBuckets, _to_java_column(col)) -@try_remote_functions +@_try_remote_functions def call_udf(udfName: str, *cols: "ColumnOrName") -> Column: """ Call an user-defined function. @@ -15599,11 +15169,11 @@ def call_udf(udfName: str, *cols: "ColumnOrName") -> Column: | cc| +-----------+ """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("call_udf", udfName, _to_seq(sc, cols, _to_java_column)) -@try_remote_functions +@_try_remote_functions def call_function(funcName: str, *cols: "ColumnOrName") -> Column: """ Call a SQL function. @@ -15668,11 +15238,11 @@ def call_function(funcName: str, *cols: "ColumnOrName") -> Column: | 102.0| +------------------------------------+ """ - sc = get_active_spark_context() + sc = _get_active_spark_context() return _invoke_function("call_function", funcName, _to_seq(sc, cols, _to_java_column)) -@try_remote_functions +@_try_remote_functions def unwrap_udt(col: "ColumnOrName") -> Column: """ Unwrap UDT data type column into its underlying type. @@ -15686,7 +15256,7 @@ def unwrap_udt(col: "ColumnOrName") -> Column: return _invoke_function("unwrap_udt", _to_java_column(col)) -@try_remote_functions +@_try_remote_functions def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]] = None) -> Column: """ Aggregate function: returns the updatable binary representation of the Datasketches @@ -15740,7 +15310,7 @@ def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]] return _invoke_function_over_columns("hll_sketch_agg", col, _lgConfigK) -@try_remote_functions +@_try_remote_functions def hll_union_agg( col: "ColumnOrName", allowDifferentLgConfigK: Optional[Union[bool, Column]] = None ) -> Column: @@ -15808,7 +15378,7 @@ def hll_union_agg( return _invoke_function_over_columns("hll_union_agg", col, _allowDifferentLgConfigK) -@try_remote_functions +@_try_remote_functions def hll_sketch_estimate(col: "ColumnOrName") -> Column: """ Returns the estimated number of unique values given the binary representation @@ -15839,7 +15409,7 @@ def hll_sketch_estimate(col: "ColumnOrName") -> Column: return _invoke_function("hll_sketch_estimate", _to_java_column(col)) -@try_remote_functions +@_try_remote_functions def hll_union( col1: "ColumnOrName", col2: "ColumnOrName", allowDifferentLgConfigK: Optional[bool] = None ) -> Column: @@ -15885,7 +15455,7 @@ def hll_union( # ---------------------- Predicates functions ------------------------------ -@try_remote_functions +@_try_remote_functions def ifnull(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Returns `col2` if `col1` is null, or `col1` otherwise. @@ -15912,7 +15482,7 @@ def ifnull(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("ifnull", col1, col2) -@try_remote_functions +@_try_remote_functions def isnotnull(col: "ColumnOrName") -> Column: """ Returns true if `col` is not null, or false otherwise. @@ -15932,7 +15502,7 @@ def isnotnull(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("isnotnull", col) -@try_remote_functions +@_try_remote_functions def equal_null(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Returns same result as the EQUAL(=) operator for non-null operands, @@ -15954,7 +15524,7 @@ def equal_null(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("equal_null", col1, col2) -@try_remote_functions +@_try_remote_functions def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Returns null if `col1` equals to `col2`, or `col1` otherwise. @@ -15975,7 +15545,7 @@ def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("nullif", col1, col2) -@try_remote_functions +@_try_remote_functions def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: """ Returns `col2` if `col1` is null, or `col1` otherwise. @@ -15996,7 +15566,7 @@ def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column: return _invoke_function_over_columns("nvl", col1, col2) -@try_remote_functions +@_try_remote_functions def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Column: """ Returns `col2` if `col1` is not null, or `col3` otherwise. @@ -16018,7 +15588,7 @@ def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Co return _invoke_function_over_columns("nvl2", col1, col2, col3) -@try_remote_functions +@_try_remote_functions def aes_encrypt( input: "ColumnOrName", key: "ColumnOrName", @@ -16110,7 +15680,7 @@ def aes_encrypt( return _invoke_function_over_columns("aes_encrypt", input, key, _mode, _padding, _iv, _aad) -@try_remote_functions +@_try_remote_functions def aes_decrypt( input: "ColumnOrName", key: "ColumnOrName", @@ -16184,7 +15754,7 @@ def aes_decrypt( return _invoke_function_over_columns("aes_decrypt", input, key, _mode, _padding, _aad) -@try_remote_functions +@_try_remote_functions def try_aes_decrypt( input: "ColumnOrName", key: "ColumnOrName", @@ -16260,7 +15830,7 @@ def try_aes_decrypt( return _invoke_function_over_columns("try_aes_decrypt", input, key, _mode, _padding, _aad) -@try_remote_functions +@_try_remote_functions def sha(col: "ColumnOrName") -> Column: """ Returns a sha1 hash value as a hex string of the `col`. @@ -16284,7 +15854,7 @@ def sha(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sha", col) -@try_remote_functions +@_try_remote_functions def input_file_block_length() -> Column: """ Returns the length of the block being read, or -1 if not available. @@ -16300,7 +15870,7 @@ def input_file_block_length() -> Column: return _invoke_function_over_columns("input_file_block_length") -@try_remote_functions +@_try_remote_functions def input_file_block_start() -> Column: """ Returns the start offset of the block being read, or -1 if not available. @@ -16316,7 +15886,7 @@ def input_file_block_start() -> Column: return _invoke_function_over_columns("input_file_block_start") -@try_remote_functions +@_try_remote_functions def reflect(*cols: "ColumnOrName") -> Column: """ Calls a method with reflection. @@ -16341,7 +15911,7 @@ def reflect(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("reflect", cols) -@try_remote_functions +@_try_remote_functions def java_method(*cols: "ColumnOrName") -> Column: """ Calls a method with reflection. @@ -16374,7 +15944,7 @@ def java_method(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("java_method", cols) -@try_remote_functions +@_try_remote_functions def try_reflect(*cols: "ColumnOrName") -> Column: """ This is a special version of `reflect` that performs the same operation, but returns a NULL @@ -16402,7 +15972,7 @@ def try_reflect(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("try_reflect", cols) -@try_remote_functions +@_try_remote_functions def version() -> Column: """ Returns the Spark version. The string contains 2 fields, the first being a release version @@ -16423,7 +15993,7 @@ def version() -> Column: return _invoke_function_over_columns("version") -@try_remote_functions +@_try_remote_functions def typeof(col: "ColumnOrName") -> Column: """ Return DDL-formatted type string for the data type of the input. @@ -16443,7 +16013,7 @@ def typeof(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("typeof", col) -@try_remote_functions +@_try_remote_functions def stack(*cols: "ColumnOrName") -> Column: """ Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default @@ -16471,7 +16041,7 @@ def stack(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("stack", cols) -@try_remote_functions +@_try_remote_functions def bitmap_bit_position(col: "ColumnOrName") -> Column: """ Returns the bit position for the given input column. @@ -16492,7 +16062,7 @@ def bitmap_bit_position(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bitmap_bit_position", col) -@try_remote_functions +@_try_remote_functions def bitmap_bucket_number(col: "ColumnOrName") -> Column: """ Returns the bucket number for the given input column. @@ -16513,7 +16083,7 @@ def bitmap_bucket_number(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bitmap_bucket_number", col) -@try_remote_functions +@_try_remote_functions def bitmap_construct_agg(col: "ColumnOrName") -> Column: """ Returns a bitmap with the positions of the bits set from all the values from the input column. @@ -16537,7 +16107,7 @@ def bitmap_construct_agg(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bitmap_construct_agg", col) -@try_remote_functions +@_try_remote_functions def bitmap_count(col: "ColumnOrName") -> Column: """ Returns the number of set bits in the input bitmap. @@ -16558,7 +16128,7 @@ def bitmap_count(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("bitmap_count", col) -@try_remote_functions +@_try_remote_functions def bitmap_or_agg(col: "ColumnOrName") -> Column: """ Returns a bitmap that is the bitwise OR of all of the bitmaps from the input column. @@ -16613,7 +16183,7 @@ def udf( ... -@try_remote_functions +@_try_remote_functions def udf( f: Optional[Union[Callable[..., Any], "DataTypeOrString"]] = None, returnType: "DataTypeOrString" = StringType(), @@ -16739,7 +16309,7 @@ def udf( return _create_py_udf(f=f, returnType=returnType, useArrow=useArrow) -@try_remote_functions +@_try_remote_functions def udtf( cls: Optional[Type] = None, *, diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py index f73126f242f68..46b66537299e7 100644 --- a/python/pyspark/sql/tests/connect/test_connect_function.py +++ b/python/pyspark/sql/tests/connect/test_connect_function.py @@ -2373,11 +2373,7 @@ def test_function_parity(self): cf_fn = {name for (name, value) in getmembers(CF, isfunction) if name[0] != "_"} # Functions in vanilla PySpark we do not expect to be available in Spark Connect - sf_excluded_fn = { - "get_active_spark_context", # internal helper function - "try_remote_functions", # internal helper function - "to_str", # internal helper function - } + sf_excluded_fn = set() self.assertEqual( sf_fn - cf_fn, diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 5a8e36d287c08..e643a6c07fa4d 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -93,39 +93,6 @@ def test_function_parity(self): expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected" ) - def test_public_function(self): - inspected_list = {name for (name, value) in getmembers(F, isfunction) if name[0] != "_"} - - public_list = set(F.__all__) - - # check alias: both function 'pow' and its alias 'power' should be included - self.assertTrue("pow" in inspected_list) - self.assertTrue("power" in inspected_list) - self.assertTrue("pow" in public_list) - self.assertTrue("power" in public_list) - - inspected_execuded_list = { - "get_active_spark_context", # internal helper function - "try_remote_functions", # internal helper function - "to_str", # internal helper function - } - - self.assertEqual( - inspected_list - public_list, - inspected_execuded_list, - "Inspected functions NOT exposed!", - ) - - public_execuded_list = { - "PandasUDFType", # type, not a function - } - - self.assertEqual( - public_list - inspected_list, - public_execuded_list, - "Non-existent functions exposed!", - ) - def test_explode(self): d = [ Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"}), diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java index 609cfab2d568e..ebecb6f507e6a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java @@ -696,9 +696,8 @@ public String[] fieldNames() { /** * Returns the column default value SQL string (Spark SQL dialect). The default value literal * is not provided as updating column default values does not need to back-fill existing data. - * Null means dropping the column default value. + * Empty string means dropping the column default value. */ - @Nullable public String newDefaultValue() { return newDefaultValue; } @Override diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala index 74d848298698b..73e44fe91ba75 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala @@ -368,6 +368,11 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean val direction = if (reverse) " DESC" else "" s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)" } + + override def checkInputDataTypes(): TypeCheckResult = { + percentile.checkInputDataTypes() + } + override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): PercentileCont = this.copy(left = newLeft, right = newRight) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala index eb9d45f06ec79..b02c4fac12dec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.catalyst.plans.logical -import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition} +import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition, ResolvedFieldName} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.catalyst.util.{ResolveDefaultColumns, TypeUtils} import org.apache.spark.sql.connector.catalog.{TableCatalog, TableChange} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.DataType @@ -228,6 +228,13 @@ case class AlterColumn( TableChange.updateColumnPosition(colName, newPosition.position) } val defaultValueChange = setDefaultExpression.map { newDefaultExpression => + if (newDefaultExpression.nonEmpty) { + // SPARK-45075: We call 'ResolveDefaultColumns.analyze' here to make sure that the default + // value parses successfully, and return an error otherwise + val newDataType = dataType.getOrElse(column.asInstanceOf[ResolvedFieldName].field.dataType) + ResolveDefaultColumns.analyze(column.name.last, newDataType, newDefaultExpression, + "ALTER TABLE ALTER COLUMN") + } TableChange.updateColumnDefaultValue(colName, newDefaultExpression) } typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange ++ defaultValueChange diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java b/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java index 419dda874d3d9..29271fc5c0a29 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java @@ -22,6 +22,8 @@ public final class ColumnDictionary implements Dictionary { private int[] intDictionary; private long[] longDictionary; + private float[] floatDictionary; + private double[] doubleDictionary; public ColumnDictionary(int[] dictionary) { this.intDictionary = dictionary; @@ -31,6 +33,14 @@ public ColumnDictionary(long[] dictionary) { this.longDictionary = dictionary; } + public ColumnDictionary(float[] dictionary) { + this.floatDictionary = dictionary; + } + + public ColumnDictionary(double[] dictionary) { + this.doubleDictionary = dictionary; + } + @Override public int decodeToInt(int id) { return intDictionary[id]; @@ -42,14 +52,10 @@ public long decodeToLong(int id) { } @Override - public float decodeToFloat(int id) { - throw new UnsupportedOperationException("Dictionary encoding does not support float"); - } + public float decodeToFloat(int id) { return floatDictionary[id]; } @Override - public double decodeToDouble(int id) { - throw new UnsupportedOperationException("Dictionary encoding does not support double"); - } + public double decodeToDouble(int id) { return doubleDictionary[id]; } @Override public byte[] decodeToBinary(int id) { diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index bc2636caefd08..9cb1b1f0b5e3c 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -213,9 +213,14 @@ public byte getByte(int rowId) { @Override public byte[] getBytes(int rowId, int count) { - assert(dictionary == null); byte[] array = new byte[count]; - Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count); + if (dictionary == null) { + Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getByte(rowId + i); + } + } return array; } @@ -269,9 +274,14 @@ public short getShort(int rowId) { @Override public short[] getShorts(int rowId, int count) { - assert(dictionary == null); short[] array = new short[count]; - Platform.copyMemory(null, data + rowId * 2L, array, Platform.SHORT_ARRAY_OFFSET, count * 2L); + if (dictionary == null) { + Platform.copyMemory(null, data + rowId * 2L, array, Platform.SHORT_ARRAY_OFFSET, count * 2L); + } else { + for (int i = 0; i < count; i++) { + array[i] = getShort(rowId + i); + } + } return array; } @@ -330,9 +340,14 @@ public int getInt(int rowId) { @Override public int[] getInts(int rowId, int count) { - assert(dictionary == null); int[] array = new int[count]; - Platform.copyMemory(null, data + rowId * 4L, array, Platform.INT_ARRAY_OFFSET, count * 4L); + if (dictionary == null) { + Platform.copyMemory(null, data + rowId * 4L, array, Platform.INT_ARRAY_OFFSET, count * 4L); + } else { + for (int i = 0; i < count; i++) { + array[i] = getInt(rowId + i); + } + } return array; } @@ -403,9 +418,14 @@ public long getLong(int rowId) { @Override public long[] getLongs(int rowId, int count) { - assert(dictionary == null); long[] array = new long[count]; - Platform.copyMemory(null, data + rowId * 8L, array, Platform.LONG_ARRAY_OFFSET, count * 8L); + if (dictionary == null) { + Platform.copyMemory(null, data + rowId * 8L, array, Platform.LONG_ARRAY_OFFSET, count * 8L); + } else { + for (int i = 0; i < count; i++) { + array[i] = getLong(rowId + i); + } + } return array; } @@ -462,9 +482,14 @@ public float getFloat(int rowId) { @Override public float[] getFloats(int rowId, int count) { - assert(dictionary == null); float[] array = new float[count]; - Platform.copyMemory(null, data + rowId * 4L, array, Platform.FLOAT_ARRAY_OFFSET, count * 4L); + if (dictionary == null) { + Platform.copyMemory(null, data + rowId * 4L, array, Platform.FLOAT_ARRAY_OFFSET, count * 4L); + } else { + for (int i = 0; i < count; i++) { + array[i] = getFloat(rowId + i); + } + } return array; } @@ -522,9 +547,15 @@ public double getDouble(int rowId) { @Override public double[] getDoubles(int rowId, int count) { - assert(dictionary == null); double[] array = new double[count]; - Platform.copyMemory(null, data + rowId * 8L, array, Platform.DOUBLE_ARRAY_OFFSET, count * 8L); + if (dictionary == null) { + Platform.copyMemory(null, data + rowId * 8L, array, Platform.DOUBLE_ARRAY_OFFSET, + count * 8L); + } else { + for (int i = 0; i < count; i++) { + array[i] = getDouble(rowId + i); + } + } return array; } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index 56a96907f0f08..be590bb9ac726 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -211,9 +211,14 @@ public byte getByte(int rowId) { @Override public byte[] getBytes(int rowId, int count) { - assert(dictionary == null); byte[] array = new byte[count]; - System.arraycopy(byteData, rowId, array, 0, count); + if (dictionary == null) { + System.arraycopy(byteData, rowId, array, 0, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getByte(rowId + i); + } + } return array; } @@ -266,9 +271,14 @@ public short getShort(int rowId) { @Override public short[] getShorts(int rowId, int count) { - assert(dictionary == null); short[] array = new short[count]; - System.arraycopy(shortData, rowId, array, 0, count); + if (dictionary == null) { + System.arraycopy(shortData, rowId, array, 0, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getShort(rowId + i); + } + } return array; } @@ -322,9 +332,14 @@ public int getInt(int rowId) { @Override public int[] getInts(int rowId, int count) { - assert(dictionary == null); int[] array = new int[count]; - System.arraycopy(intData, rowId, array, 0, count); + if (dictionary == null) { + System.arraycopy(intData, rowId, array, 0, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getInt(rowId + i); + } + } return array; } @@ -389,9 +404,14 @@ public long getLong(int rowId) { @Override public long[] getLongs(int rowId, int count) { - assert(dictionary == null); long[] array = new long[count]; - System.arraycopy(longData, rowId, array, 0, count); + if (dictionary == null) { + System.arraycopy(longData, rowId, array, 0, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getLong(rowId + i); + } + } return array; } @@ -441,9 +461,14 @@ public float getFloat(int rowId) { @Override public float[] getFloats(int rowId, int count) { - assert(dictionary == null); float[] array = new float[count]; - System.arraycopy(floatData, rowId, array, 0, count); + if (dictionary == null) { + System.arraycopy(floatData, rowId, array, 0, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getFloat(rowId + i); + } + } return array; } @@ -495,9 +520,14 @@ public double getDouble(int rowId) { @Override public double[] getDoubles(int rowId, int count) { - assert(dictionary == null); double[] array = new double[count]; - System.arraycopy(doubleData, rowId, array, 0, count); + if (dictionary == null) { + System.arraycopy(doubleData, rowId, array, 0, count); + } else { + for (int i = 0; i < count; i++) { + array[i] = getDouble(rowId + i); + } + } return array; } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c1f674d2c0c69..5935695818e3b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -6267,8 +6267,8 @@ object functions { * }}} * * @param column the input array column - * @param f (col, index) => transformed_col, the lambda function to filter the input column - * given the index. Indices start at 0. + * @param f (col, index) => transformed_col, the lambda function to transform the input + * column given the index. Indices start at 0. * * @group collection_funcs * @since 3.0.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 2f5e813dcb618..d775f5fce2f46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -702,6 +702,7 @@ object JdbcDialects { registerDialect(OracleDialect) registerDialect(TeradataDialect) registerDialect(H2Dialect) + registerDialect(SnowflakeDialect) /** * Fetch the JdbcDialect class corresponding to a given database url. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala new file mode 100644 index 0000000000000..d8a8fe6ba4a90 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.util.Locale + +import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils +import org.apache.spark.sql.types.{BooleanType, DataType} + +private case object SnowflakeDialect extends JdbcDialect { + override def canHandle(url: String): Boolean = + url.toLowerCase(Locale.ROOT).startsWith("jdbc:snowflake") + + override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { + case BooleanType => + // By default, BOOLEAN is mapped to BIT(1). + // but Snowflake does not have a BIT type. It uses BOOLEAN instead. + Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) + case _ => JdbcUtils.getCommonJDBCType(dt) + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out index 36845ce63466e..16fb510d5d459 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out @@ -868,6 +868,31 @@ Aggregate [percentile_disc(a#x, cast(0.0 as double), false, 0, 0, true) AS p0#x, +- LocalRelation [a#x] +-- !query +SELECT + percentile_cont(b) WITHIN GROUP (ORDER BY a DESC) as p0 +FROM values (12, 0.25), (13, 0.25), (22, 0.25) as v(a, b) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"b\"", + "inputName" : "percentage", + "inputType" : "\"DOUBLE\"", + "sqlExpr" : "\"percentile_cont(a, b)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 10, + "stopIndex" : 58, + "fragment" : "percentile_cont(b) WITHIN GROUP (ORDER BY a DESC)" + } ] +} + + -- !query SET spark.sql.legacy.percentileDiscCalculation = false -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql index 87c5d4be90ce1..eae8a71be7e56 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql @@ -374,4 +374,8 @@ SELECT percentile_disc(1.0) WITHIN GROUP (ORDER BY a) as p10 FROM VALUES (0), (1), (2), (3), (4) AS v(a); +SELECT + percentile_cont(b) WITHIN GROUP (ORDER BY a DESC) as p0 +FROM values (12, 0.25), (13, 0.25), (22, 0.25) as v(a, b); + SET spark.sql.legacy.percentileDiscCalculation = false; diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out index 0d6ab54286110..a0a4dc35f3fd8 100644 --- a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out @@ -840,6 +840,33 @@ struct +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"b\"", + "inputName" : "percentage", + "inputType" : "\"DOUBLE\"", + "sqlExpr" : "\"percentile_cont(a, b)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 10, + "stopIndex" : 58, + "fragment" : "percentile_cont(b) WITHIN GROUP (ORDER BY a DESC)" + } ] +} + + -- !query SET spark.sql.legacy.percentileDiscCalculation = false -- !query schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala index a18c767570f01..ca60e3212e686 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala @@ -363,6 +363,35 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase { } } + test("SPARK-45075: ALTER COLUMN with invalid default value") { + withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> s"$v2Format, ") { + withTable("t") { + sql(s"create table t(i boolean) using $v2Format") + // The default value fails to analyze. + checkError( + exception = intercept[AnalysisException] { + sql("alter table t add column s bigint default badvalue") + }, + errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION", + parameters = Map( + "statement" -> "ALTER TABLE", + "colName" -> "`s`", + "defaultValue" -> "badvalue")) + + sql("alter table t add column s bigint default 3L") + checkError( + exception = intercept[AnalysisException] { + sql("alter table t alter column s set default badvalue") + }, + errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION", + parameters = Map( + "statement" -> "ALTER TABLE ALTER COLUMN", + "colName" -> "`s`", + "defaultValue" -> "badvalue")) + } + } + } + test("AlterTable: add complex column") { val t = fullTableName("table_name") withTable(t) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 16e0e6b43922d..2e7b26126d24f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -1108,6 +1108,16 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS checkAnswer(sql("select * from tbl"), expected) } } + + test("SPARK-44805: cast of struct with two arrays") { + withTable("tbl") { + sql("create table tbl (value struct,f2:array>) using parquet") + sql("insert into tbl values (named_struct('f1', array(1, 2, 3), 'f2', array(1, 1, 2)))") + val df = sql("select cast(value as struct,f2:array>) AS value from tbl") + val expected = Row(Row(Array(1.0d, 2.0d, 3.0d), Array(1, 1, 2))) :: Nil + checkAnswer(df, expected) + } + } } class ParquetV1QuerySuite extends ParquetQuerySuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala index b2b2729e90e16..42125c59bb773 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.vectorized import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.execution.columnar.ColumnAccessor +import org.apache.spark.sql.execution.columnar.{ColumnAccessor, ColumnDictionary} import org.apache.spark.sql.execution.columnar.compression.ColumnBuilderHelper import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -397,6 +397,84 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper { assert(testVector.getStruct(1).get(1, DoubleType) === 5.67) } + testVectors("SPARK-44805: getInts with dictionary", 3, IntegerType) { testVector => + val dict = new ColumnDictionary(Array[Int](7, 8, 9)) + testVector.setDictionary(dict) + testVector.reserveDictionaryIds(3) + testVector.getDictionaryIds.putInt(0, 0) + testVector.getDictionaryIds.putInt(1, 1) + testVector.getDictionaryIds.putInt(2, 2) + + assert(testVector.getInts(0, 3)(0) == 7) + assert(testVector.getInts(0, 3)(1) == 8) + assert(testVector.getInts(0, 3)(2) == 9) + } + + testVectors("SPARK-44805: getShorts with dictionary", 3, ShortType) { testVector => + val dict = new ColumnDictionary(Array[Int](7, 8, 9)) + testVector.setDictionary(dict) + testVector.reserveDictionaryIds(3) + testVector.getDictionaryIds.putInt(0, 0) + testVector.getDictionaryIds.putInt(1, 1) + testVector.getDictionaryIds.putInt(2, 2) + + assert(testVector.getShorts(0, 3)(0) == 7) + assert(testVector.getShorts(0, 3)(1) == 8) + assert(testVector.getShorts(0, 3)(2) == 9) + } + + testVectors("SPARK-44805: getBytes with dictionary", 3, ByteType) { testVector => + val dict = new ColumnDictionary(Array[Int](7, 8, 9)) + testVector.setDictionary(dict) + testVector.reserveDictionaryIds(3) + testVector.getDictionaryIds.putInt(0, 0) + testVector.getDictionaryIds.putInt(1, 1) + testVector.getDictionaryIds.putInt(2, 2) + + assert(testVector.getBytes(0, 3)(0) == 7) + assert(testVector.getBytes(0, 3)(1) == 8) + assert(testVector.getBytes(0, 3)(2) == 9) + } + + testVectors("SPARK-44805: getLongs with dictionary", 3, LongType) { testVector => + val dict = new ColumnDictionary(Array[Long](2147483648L, 2147483649L, 2147483650L)) + testVector.setDictionary(dict) + testVector.reserveDictionaryIds(3) + testVector.getDictionaryIds.putInt(0, 0) + testVector.getDictionaryIds.putInt(1, 1) + testVector.getDictionaryIds.putInt(2, 2) + + assert(testVector.getLongs(0, 3)(0) == 2147483648L) + assert(testVector.getLongs(0, 3)(1) == 2147483649L) + assert(testVector.getLongs(0, 3)(2) == 2147483650L) + } + + testVectors("SPARK-44805: getFloats with dictionary", 3, FloatType) { testVector => + val dict = new ColumnDictionary(Array[Float](0.1f, 0.2f, 0.3f)) + testVector.setDictionary(dict) + testVector.reserveDictionaryIds(3) + testVector.getDictionaryIds.putInt(0, 0) + testVector.getDictionaryIds.putInt(1, 1) + testVector.getDictionaryIds.putInt(2, 2) + + assert(testVector.getFloats(0, 3)(0) == 0.1f) + assert(testVector.getFloats(0, 3)(1) == 0.2f) + assert(testVector.getFloats(0, 3)(2) == 0.3f) + } + + testVectors("SPARK-44805: getDoubles with dictionary", 3, DoubleType) { testVector => + val dict = new ColumnDictionary(Array[Double](1342.17727d, 1342.17728d, 1342.17729d)) + testVector.setDictionary(dict) + testVector.reserveDictionaryIds(3) + testVector.getDictionaryIds.putInt(0, 0) + testVector.getDictionaryIds.putInt(1, 1) + testVector.getDictionaryIds.putInt(2, 2) + + assert(testVector.getDoubles(0, 3)(0) == 1342.17727d) + assert(testVector.getDoubles(0, 3)(1) == 1342.17728d) + assert(testVector.getDoubles(0, 3)(2) == 1342.17729d) + } + test("[SPARK-22092] off-heap column vector reallocation corrupts array data") { withVector(new OffHeapColumnVector(8, arrayType)) { testVector => val data = testVector.arrayData() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 93b6652d516cc..6f60e0f739489 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -2057,4 +2057,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession { val df = sql("SELECT * FROM composite_name WHERE `last name` = 'smith'") assert(df.collect.toSet === Set(Row("smith", 1))) } + + test("SPARK-44866: SnowflakeDialect BOOLEAN type mapping") { + val snowflakeDialect = JdbcDialects.get("jdbc:snowflake://account.snowflakecomputing.com") + assert(snowflakeDialect.getJDBCType(BooleanType).map(_.databaseTypeDefinition).get == "BOOLEAN") + } }