From 317e114e11669899618c7c06bbc0091b36618f36 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Sat, 29 Nov 2014 00:31:06 -0800 Subject: [PATCH 01/82] [SPARK-3398] [SPARK-4325] [EC2] Use EC2 status checks. This PR re-introduces [0e648bc](https://github.com/apache/spark/commit/0e648bc2bedcbeb55fce5efac04f6dbad9f063b4) from PR #2339, which somehow never made it into the codebase. Additionally, it removes a now-unnecessary linear backoff on the SSH checks since we are blocking on EC2 status checks before testing SSH. Author: Nicholas Chammas Closes #3195 from nchammas/remove-ec2-ssh-backoff and squashes the following commits: efb29e1 [Nicholas Chammas] Revert "Remove linear backoff." ef3ca99 [Nicholas Chammas] reuse conn adb4eaa [Nicholas Chammas] Remove linear backoff. 55caa24 [Nicholas Chammas] Check EC2 status checks before SSH. --- ec2/spark_ec2.py | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 742c7765e728e..b83decadc2988 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -33,6 +33,7 @@ import time import urllib2 import warnings +from datetime import datetime from optparse import OptionParser from sys import stderr import boto @@ -589,7 +590,9 @@ def setup_spark_cluster(master, opts): def is_ssh_available(host, opts): - "Checks if SSH is available on the host." + """ + Check if SSH is available on a host. + """ try: with open(os.devnull, 'w') as devnull: ret = subprocess.check_call( @@ -604,6 +607,9 @@ def is_ssh_available(host, opts): def is_cluster_ssh_available(cluster_instances, opts): + """ + Check if SSH is available on all the instances in a cluster. + """ for i in cluster_instances: if not is_ssh_available(host=i.ip_address, opts=opts): return False @@ -611,8 +617,10 @@ def is_cluster_ssh_available(cluster_instances, opts): return True -def wait_for_cluster_state(cluster_instances, cluster_state, opts): +def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state): """ + Wait for all the instances in the cluster to reach a designated state. + cluster_instances: a list of boto.ec2.instance.Instance cluster_state: a string representing the desired state of all the instances in the cluster value can be 'ssh-ready' or a valid value from boto.ec2.instance.InstanceState such as @@ -620,20 +628,27 @@ def wait_for_cluster_state(cluster_instances, cluster_state, opts): (would be nice to replace this with a proper enum: http://stackoverflow.com/a/1695250) """ sys.stdout.write( - "Waiting for all instances in cluster to enter '{s}' state.".format(s=cluster_state) + "Waiting for cluster to enter '{s}' state.".format(s=cluster_state) ) sys.stdout.flush() + start_time = datetime.now() + num_attempts = 0 + conn = ec2.connect_to_region(opts.region) while True: - time.sleep(3 * num_attempts) + time.sleep(5 * num_attempts) # seconds for i in cluster_instances: - s = i.update() # capture output to suppress print to screen in newer versions of boto + i.update() + + statuses = conn.get_all_instance_status(instance_ids=[i.id for i in cluster_instances]) if cluster_state == 'ssh-ready': if all(i.state == 'running' for i in cluster_instances) and \ + all(s.system_status.status == 'ok' for s in statuses) and \ + all(s.instance_status.status == 'ok' for s in statuses) and \ is_cluster_ssh_available(cluster_instances, opts): break else: @@ -647,6 +662,12 @@ def wait_for_cluster_state(cluster_instances, cluster_state, opts): sys.stdout.write("\n") + end_time = datetime.now() + print "Cluster is now in '{s}' state. Waited {t} seconds.".format( + s=cluster_state, + t=(end_time - start_time).seconds + ) + # Get number of local disks available for a given EC2 instance type. def get_num_disks(instance_type): @@ -895,7 +916,7 @@ def real_main(): # See: https://docs.python.org/3.5/whatsnew/2.7.html warnings.warn( "This option is deprecated and has no effect. " - "spark-ec2 automatically waits as long as necessary for clusters to startup.", + "spark-ec2 automatically waits as long as necessary for clusters to start up.", DeprecationWarning ) @@ -922,9 +943,10 @@ def real_main(): else: (master_nodes, slave_nodes) = launch_cluster(conn, opts, cluster_name) wait_for_cluster_state( + conn=conn, + opts=opts, cluster_instances=(master_nodes + slave_nodes), - cluster_state='ssh-ready', - opts=opts + cluster_state='ssh-ready' ) setup_cluster(conn, master_nodes, slave_nodes, opts, True) @@ -951,9 +973,10 @@ def real_main(): print "Deleting security groups (this will take some time)..." group_names = [cluster_name + "-master", cluster_name + "-slaves"] wait_for_cluster_state( + conn=conn, + opts=opts, cluster_instances=(master_nodes + slave_nodes), - cluster_state='terminated', - opts=opts + cluster_state='terminated' ) attempt = 1 while attempt <= 3: @@ -1055,9 +1078,10 @@ def real_main(): if inst.state not in ["shutting-down", "terminated"]: inst.start() wait_for_cluster_state( + conn=conn, + opts=opts, cluster_instances=(master_nodes + slave_nodes), - cluster_state='ssh-ready', - opts=opts + cluster_state='ssh-ready' ) setup_cluster(conn, master_nodes, slave_nodes, opts, False) From 95290bf4c4c786c95ba1ffdfe73bc61f6a0ee6b1 Mon Sep 17 00:00:00 2001 From: Stephen Haberman Date: Sat, 29 Nov 2014 20:12:05 -0500 Subject: [PATCH 02/82] Include the key name when failing on an invalid value. Admittedly a really small tweak. Author: Stephen Haberman Closes #3514 from stephenh/include-key-name-in-npe and squashes the following commits: 937740a [Stephen Haberman] Include the key name when failing on an invalid value. --- core/src/main/scala/org/apache/spark/SparkConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 4c6c86c7bad78..c14764f773982 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -61,7 +61,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { throw new NullPointerException("null key") } if (value == null) { - throw new NullPointerException("null value") + throw new NullPointerException("null value for " + key) } settings(key) = value this From 938dc141ee4448c20441fa9dfa3a9897a11ed4b6 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Sat, 29 Nov 2014 20:14:14 -0500 Subject: [PATCH 03/82] [SPARK-4057] Use -agentlib instead of -Xdebug in sbt-launch-lib.bash for debugging In -launch-lib.bash, -Xdebug option is used for debugging. We should use -agentlib option for Java 6+. Author: Kousuke Saruta Closes #2904 from sarutak/SPARK-4057 and squashes the following commits: 39b5320 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-4057 26b4af8 [Kousuke Saruta] Improved java option for debugging --- sbt/sbt-launch-lib.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbt/sbt-launch-lib.bash b/sbt/sbt-launch-lib.bash index 84a6f7a207186..fa7222d55a6db 100755 --- a/sbt/sbt-launch-lib.bash +++ b/sbt/sbt-launch-lib.bash @@ -104,7 +104,7 @@ addResidual () { residual_args=( "${residual_args[@]}" "$1" ) } addDebugger () { - addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1" + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" } # a ham-fisted attempt to move some memory settings in concert From c06222427f866fe216d819bbf4eba7b1c834835c Mon Sep 17 00:00:00 2001 From: zsxwing Date: Sat, 29 Nov 2014 20:23:08 -0500 Subject: [PATCH 04/82] [SPARK-4505][Core] Add a ClassTag parameter to CompactBuffer[T] Added a ClassTag parameter to CompactBuffer. So CompactBuffer[T] can create primitive arrays for primitive types. It will reduce the memory usage for primitive types significantly and only pay minor performance lost. Here is my test code: ```Scala // Call org.apache.spark.util.SizeEstimator.estimate def estimateSize(obj: AnyRef): Long = { val c = Class.forName("org.apache.spark.util.SizeEstimator$") val f = c.getField("MODULE$") val o = f.get(c) val m = c.getMethod("estimate", classOf[Object]) m.setAccessible(true) m.invoke(o, obj).asInstanceOf[Long] } sc.parallelize(1 to 10000).groupBy(_ => 1).foreach { case (k, v) => println(v.getClass() + " size: " + estimateSize(v)) } ``` Using the previous CompactBuffer outputed ``` class org.apache.spark.util.collection.CompactBuffer size: 313358 ``` Using the new CompactBuffer outputed ``` class org.apache.spark.util.collection.CompactBuffer size: 65712 ``` In this case, the new `CompactBuffer` only used 20% memory of the previous one. It's really helpful for `groupByKey` when using a primitive value. Author: zsxwing Closes #3378 from zsxwing/SPARK-4505 and squashes the following commits: 4abdbba [zsxwing] Add a ClassTag parameter to reduce the memory usage of CompactBuffer[T] when T is a primitive type --- .../spark/util/collection/CompactBuffer.scala | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala index d44e15e3c97ea..4d43d8d5cc8d8 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala @@ -17,6 +17,8 @@ package org.apache.spark.util.collection +import scala.reflect.ClassTag + /** * An append-only buffer similar to ArrayBuffer, but more memory-efficient for small buffers. * ArrayBuffer always allocates an Object array to store the data, with 16 entries by default, @@ -25,7 +27,7 @@ package org.apache.spark.util.collection * entries than that. This makes it more efficient for operations like groupBy where we expect * some keys to have very few elements. */ -private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { +private[spark] class CompactBuffer[T: ClassTag] extends Seq[T] with Serializable { // First two elements private var element0: T = _ private var element1: T = _ @@ -34,7 +36,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { private var curSize = 0 // Array for extra elements - private var otherElements: Array[AnyRef] = null + private var otherElements: Array[T] = null def apply(position: Int): T = { if (position < 0 || position >= curSize) { @@ -45,7 +47,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { } else if (position == 1) { element1 } else { - otherElements(position - 2).asInstanceOf[T] + otherElements(position - 2) } } @@ -58,7 +60,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { } else if (position == 1) { element1 = value } else { - otherElements(position - 2) = value.asInstanceOf[AnyRef] + otherElements(position - 2) = value } } @@ -72,7 +74,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { curSize = 2 } else { growToSize(curSize + 1) - otherElements(newIndex - 2) = value.asInstanceOf[AnyRef] + otherElements(newIndex - 2) = value } this } @@ -139,7 +141,7 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { newArrayLen = Int.MaxValue - 2 } } - val newArray = new Array[AnyRef](newArrayLen) + val newArray = new Array[T](newArrayLen) if (otherElements != null) { System.arraycopy(otherElements, 0, newArray, 0, otherElements.length) } @@ -150,9 +152,9 @@ private[spark] class CompactBuffer[T] extends Seq[T] with Serializable { } private[spark] object CompactBuffer { - def apply[T](): CompactBuffer[T] = new CompactBuffer[T] + def apply[T: ClassTag](): CompactBuffer[T] = new CompactBuffer[T] - def apply[T](value: T): CompactBuffer[T] = { + def apply[T: ClassTag](value: T): CompactBuffer[T] = { val buf = new CompactBuffer[T] buf += value } From 4316a7b0103977a2a1547e28cd04c842d9839f1e Mon Sep 17 00:00:00 2001 From: Takayuki Hasegawa Date: Sat, 29 Nov 2014 23:12:10 -0500 Subject: [PATCH 05/82] SPARK-4507: PR merge script should support closing multiple JIRA tickets This will fix SPARK-4507. For pull requests that reference multiple JIRAs in their titles, it would be helpful if the PR merge script offered to close all of them. Author: Takayuki Hasegawa Closes #3428 from hase1031/SPARK-4507 and squashes the following commits: bf6d64b [Takayuki Hasegawa] SPARK-4507: try to resolve issue when no JIRAs in title 401224c [Takayuki Hasegawa] SPARK-4507: moved codes as before ce89021 [Takayuki Hasegawa] SPARK-4507: PR merge script should support closing multiple JIRA tickets --- dev/merge_spark_pr.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 02ac20984add9..dfa924d2aa0ba 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -214,15 +214,10 @@ def fix_version_from_branch(branch, versions): return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] -def resolve_jira(title, merge_branches, comment): +def resolve_jira_issue(merge_branches, comment, default_jira_id=""): asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - default_jira_id = "" - search = re.findall("SPARK-[0-9]{4,5}", title) - if len(search) > 0: - default_jira_id = search[0] - jira_id = raw_input("Enter a JIRA id [%s]: " % default_jira_id) if jira_id == "": jira_id = default_jira_id @@ -280,6 +275,15 @@ def get_version_json(version_str): print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) +def resolve_jira_issues(title, merge_branches, comment): + jira_ids = re.findall("SPARK-[0-9]{4,5}", title) + + if len(jira_ids) == 0: + resolve_jira_issue(merge_branches, comment) + for jira_id in jira_ids: + resolve_jira_issue(merge_branches, comment, jira_id) + + branches = get_json("%s/branches" % GITHUB_API_BASE) branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) # Assumes branch names can be sorted lexicographically @@ -338,7 +342,7 @@ def get_version_json(version_str): if JIRA_USERNAME and JIRA_PASSWORD: continue_maybe("Would you like to update an associated JIRA?") jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) - resolve_jira(title, merged_refs, jira_comment) + resolve_jira_issues(title, merged_refs, jira_comment) else: print "JIRA_USERNAME and JIRA_PASSWORD not set" print "Exiting without trying to close the associated JIRA." From 0fcd24cc542040ff3555290eec7b021062e7e6ac Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Sun, 30 Nov 2014 00:10:31 -0500 Subject: [PATCH 06/82] [DOCS][BUILD] Add instruction to use change-version-to-2.11.sh in 'Building for Scala 2.11'. To build with Scala 2.11, we have to execute `change-version-to-2.11.sh` before Maven execute, otherwise inter-module dependencies are broken. Author: Takuya UESHIN Closes #3361 from ueshin/docs/building-spark_2.11 and squashes the following commits: 1d29126 [Takuya UESHIN] Add instruction to use change-version-to-2.11.sh in 'Building for Scala 2.11'. --- docs/building-spark.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/building-spark.md b/docs/building-spark.md index 40a47410e683a..6cca2da8e86d2 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -118,6 +118,7 @@ mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-0.12.0 -Phive-thrif # Building for Scala 2.11 To produce a Spark package compiled with Scala 2.11, use the `-Dscala-2.11` property: + dev/change-version-to-2.11.sh mvn -Pyarn -Phadoop-2.4 -Dscala-2.11 -DskipTests clean package Scala 2.11 support in Spark is experimental and does not support a few features. From 048ecca625bd812397c9fd41886c474abfc3c4ae Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Sun, 30 Nov 2014 11:40:08 -0800 Subject: [PATCH 07/82] SPARK-2143 [WEB UI] Add Spark version to UI footer This PR adds the Spark version number to the UI footer; this is how it looks: ![screen shot 2014-11-21 at 22 58 40](https://cloud.githubusercontent.com/assets/822522/5157738/f4822094-7316-11e4-98f1-333a535fdcfa.png) Author: Sean Owen Closes #3410 from srowen/SPARK-2143 and squashes the following commits: e9b3a7a [Sean Owen] Add Spark version to footer --- core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 09079bbd43f6f..315327c3c6b7c 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -212,6 +212,11 @@ private[spark] object UIUtils extends Logging { {content} + } @@ -238,6 +243,11 @@ private[spark] object UIUtils extends Logging { {content} + } From aea7a99761b00b514df2b58ca836ba0de6742a3d Mon Sep 17 00:00:00 2001 From: carlmartin Date: Sun, 30 Nov 2014 16:19:41 -0800 Subject: [PATCH 08/82] [SPARK-4623]Add the some error infomation if using spark-sql in yarn-cluster mode If using spark-sql in yarn-cluster mode, print an error infomation just as the spark shell in yarn-cluster mode. Author: carlmartin Author: huangzhaowei Closes #3479 from SaintBacchus/sparkSqlShell and squashes the following commits: 35829a9 [carlmartin] improve the description of comment e6c1eb7 [carlmartin] add a comment in bin/spark-sql to remind user who wants to change the class f1c5c8d [carlmartin] Merge branch 'master' into sparkSqlShell 8e112c5 [huangzhaowei] singular form ec957bc [carlmartin] Add the some error infomation if using spark-sql in yarn-cluster mode 7bcecc2 [carlmartin] Merge branch 'master' of https://github.com/apache/spark into codereview 4fad75a [carlmartin] Add the Error infomation using spark-sql in yarn-cluster mode --- bin/spark-sql | 2 ++ .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/bin/spark-sql b/bin/spark-sql index 63d00437d508d..3b6cc420fea81 100755 --- a/bin/spark-sql +++ b/bin/spark-sql @@ -23,6 +23,8 @@ # Enter posix mode for bash set -o posix +# NOTE: This exact class name is matched downstream by SparkSubmit. +# Any changes need to be reflected there. CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver" # Figure out where Spark is installed diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 00f291823e984..0c7d247519447 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -142,6 +142,8 @@ object SparkSubmit { printErrorAndExit("Cluster deploy mode is currently not supported for python applications.") case (_, CLUSTER) if isShell(args.primaryResource) => printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.") + case (_, CLUSTER) if isSqlShell(args.mainClass) => + printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.") case _ => } @@ -393,6 +395,13 @@ object SparkSubmit { primaryResource == SPARK_SHELL || primaryResource == PYSPARK_SHELL } + /** + * Return whether the given main class represents a sql shell. + */ + private[spark] def isSqlShell(mainClass: String): Boolean = { + mainClass == "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver" + } + /** * Return whether the given primary resource requires running python. */ From a217ec5fd5cd7addc69e538d6ec6dd64956cc8ed Mon Sep 17 00:00:00 2001 From: lewuathe Date: Sun, 30 Nov 2014 17:18:50 -0800 Subject: [PATCH 09/82] [SPARK-4656][Doc] Typo in Programming Guide markdown Grammatical error in Programming Guide document Author: lewuathe Closes #3412 from Lewuathe/typo-programming-guide and squashes the following commits: a3e2f00 [lewuathe] Typo in Programming Guide markdown --- docs/programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/programming-guide.md b/docs/programming-guide.md index c60de6e970531..7a16ee8742dc0 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -1177,7 +1177,7 @@ Accumulators are variables that are only "added" to through an associative opera therefore be efficiently supported in parallel. They can be used to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of numeric types, and programmers can add support for new types. If accumulators are created with a name, they will be -displayed in Spark's UI. This can can be useful for understanding the progress of +displayed in Spark's UI. This can be useful for understanding the progress of running stages (NOTE: this is not yet supported in Python). An accumulator is created from an initial value `v` by calling `SparkContext.accumulator(v)`. Tasks From 2a4d389f70b2066b1ac32b081bef44e61fefb03c Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Sun, 30 Nov 2014 19:04:07 -0800 Subject: [PATCH 10/82] [DOC] Fixes formatting typo in SQL programming guide [Review on Reviewable](https://reviewable.io/reviews/apache/spark/3498) Author: Cheng Lian Closes #3498 from liancheng/fix-sql-doc-typo and squashes the following commits: 865ecd7 [Cheng Lian] Fixes formatting typo in SQL programming guide --- docs/sql-programming-guide.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 5500da83b2b66..24a68bb083334 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -900,7 +900,6 @@ export HIVE_SERVER2_THRIFT_BIND_HOST= ./sbin/start-thriftserver.sh \ --master \ ... -``` {% endhighlight %} or system properties: @@ -911,7 +910,6 @@ or system properties: --hiveconf hive.server2.thrift.bind.host= \ --master ... -``` {% endhighlight %} Now you can use beeline to test the Thrift JDBC/ODBC server: From 06dc1b15e425d4dbb0e516e5788b1a4bb39a2a60 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sun, 30 Nov 2014 20:51:13 -0800 Subject: [PATCH 11/82] MAINTENANCE: Automated closing of pull requests. This commit exists to close the following pull requests on Github: Closes #2915 (close requested by 'JoshRosen') Closes #3140 (close requested by 'JoshRosen') Closes #3366 (close requested by 'JoshRosen') From 5e7a6dcb8faded33eb0feb302ae3fa3ed4f900fd Mon Sep 17 00:00:00 2001 From: Prabeesh K Date: Sun, 30 Nov 2014 20:51:53 -0800 Subject: [PATCH 12/82] [SPARK-4632] version update Author: Prabeesh K Closes #3495 from prabeesh/master and squashes the following commits: ab03d50 [Prabeesh K] Update pom.xml 8c6437e [Prabeesh K] Revert e10b40a [Prabeesh K] version update dbac9eb [Prabeesh K] Revert ec0b1c3 [Prabeesh K] [SPARK-4632] version update a835505 [Prabeesh K] [SPARK-4632] version update 831391b [Prabeesh K] [SPARK-4632] version update --- external/mqtt/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 703806735b3ff..9025915f4447e 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -43,8 +43,8 @@ org.eclipse.paho - mqtt-client - 0.4.0 + org.eclipse.paho.client.mqttv3 + 1.0.1 org.scalatest From 97eb6d7f511d56fca734ff73fcbd9694403943e0 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 1 Dec 2014 00:29:28 -0800 Subject: [PATCH 13/82] Fix wrong file name pattern in .gitignore In .gitignore, there is an entry for spark-*-bin.tar.gz but considering make-distribution.sh, the name pattern should be spark-*-bin-*.tgz. This change is really small so I don't open issue in JIRA. If it's needed, please let me know. Author: Kousuke Saruta Closes #3529 from sarutak/fix-wrong-tgz-pattern and squashes the following commits: de3c70a [Kousuke Saruta] Fixed wrong file name pattern in .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 34939e3a97aaa..c67cffa1c4375 100644 --- a/.gitignore +++ b/.gitignore @@ -49,7 +49,7 @@ dependency-reduced-pom.xml checkpoint derby.log dist/ -spark-*-bin.tar.gz +spark-*-bin-*.tgz unit-tests.log /lib/ rat-results.txt From 6384f42ab2e5c2b3e767ab4a428cda20a8ddcbe1 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Mon, 1 Dec 2014 16:31:04 +0800 Subject: [PATCH 14/82] SPARK-2192 [BUILD] Examples Data Not in Binary Distribution Simply, add data/ to distributions. This adds about 291KB (compressed) to the tarball, FYI. Author: Sean Owen Closes #3480 from srowen/SPARK-2192 and squashes the following commits: 47688f1 [Sean Owen] Add data/ to distributions --- make-distribution.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/make-distribution.sh b/make-distribution.sh index 7c0fb8992a155..45c99e42e5a5b 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -201,6 +201,9 @@ if [ -e "$FWDIR"/CHANGES.txt ]; then cp "$FWDIR/CHANGES.txt" "$DISTDIR" fi +# Copy data files +cp -r "$FWDIR/data" "$DISTDIR" + # Copy other things mkdir "$DISTDIR"/conf cp "$FWDIR"/conf/*.template "$DISTDIR"/conf From 1d238f221c3e13c525b3af0c78eda95059ce9fc6 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Mon, 1 Dec 2014 00:32:54 -0800 Subject: [PATCH 15/82] [SPARK-4664][Core] Throw an exception when spark.akka.frameSize > 2047 If `spark.akka.frameSize` > 2047, it will overflow and become negative. Should have some assertion in `maxFrameSizeBytes` to warn people. Author: zsxwing Closes #3527 from zsxwing/SPARK-4664 and squashes the following commits: 0089c7a [zsxwing] Throw an exception when spark.akka.frameSize > 2047 --- .../src/main/scala/org/apache/spark/util/AkkaUtils.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala index 10010bdfa1a51..8c2457f56bffe 100644 --- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala @@ -134,9 +134,16 @@ private[spark] object AkkaUtils extends Logging { Duration.create(conf.getLong("spark.akka.lookupTimeout", 30), "seconds") } + private val AKKA_MAX_FRAME_SIZE_IN_MB = Int.MaxValue / 1024 / 1024 + /** Returns the configured max frame size for Akka messages in bytes. */ def maxFrameSizeBytes(conf: SparkConf): Int = { - conf.getInt("spark.akka.frameSize", 10) * 1024 * 1024 + val frameSizeInMB = conf.getInt("spark.akka.frameSize", 10) + if (frameSizeInMB > AKKA_MAX_FRAME_SIZE_IN_MB) { + throw new IllegalArgumentException("spark.akka.frameSize should not be greater than " + + AKKA_MAX_FRAME_SIZE_IN_MB + "MB") + } + frameSizeInMB * 1024 * 1024 } /** Space reserved for extra data in an Akka message besides serialized task or task result. */ From 30a86acdefd5428af6d6264f59a037e0eefd74b4 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Mon, 1 Dec 2014 00:35:01 -0800 Subject: [PATCH 16/82] [SPARK-4661][Core] Minor code and docs cleanup Author: zsxwing Closes #3521 from zsxwing/SPARK-4661 and squashes the following commits: 03cbe3f [zsxwing] Minor code and docs cleanup --- core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala | 2 +- core/src/test/scala/org/apache/spark/ShuffleSuite.scala | 1 - .../org/apache/spark/sql/hive/execution/HiveTableScan.scala | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala index 56ac7a69be0d3..ed79032893d33 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala @@ -63,7 +63,7 @@ private[spark] class PipedRDD[T: ClassTag]( /** * A FilenameFilter that accepts anything that isn't equal to the name passed in. - * @param name of file or directory to leave out + * @param filterName of file or directory to leave out */ class NotEqualsFileNameFilter(filterName: String) extends FilenameFilter { def accept(dir: File, name: String): Boolean = { diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala index 85e5f9ab444b3..5d20b4dc1561a 100644 --- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala @@ -270,7 +270,6 @@ object ShuffleSuite { def mergeCombineException(x: Int, y: Int): Int = { throw new SparkException("Exception for map-side combine.") - x + y } class NonJavaSerializableClass(val value: Int) extends Comparable[NonJavaSerializableClass] { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index d39413a44a6cb..8bbcd6fec1f3b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.hive._ * :: DeveloperApi :: * The Hive table scan operator. Column and partition pruning are both handled. * - * @param attributes Attributes to be fetched from the Hive table. + * @param requestedAttributes Attributes to be fetched from the Hive table. * @param relation The Hive table be be scanned. * @param partitionPruningPred An optional partition pruning predicate for partitioned table. */ From 2b233f5fc4beb2c6ed4bc142e923e96f8bad3ec4 Mon Sep 17 00:00:00 2001 From: Madhu Siddalingaiah Date: Mon, 1 Dec 2014 08:45:34 -0800 Subject: [PATCH 17/82] Documentation: add description for repartitionAndSortWithinPartitions Author: Madhu Siddalingaiah Closes #3390 from msiddalingaiah/master and squashes the following commits: cbccbfe [Madhu Siddalingaiah] Documentation: replace with (again) 332f7a2 [Madhu Siddalingaiah] Documentation: replace with cd2b05a [Madhu Siddalingaiah] Merge remote-tracking branch 'upstream/master' 0fc12d7 [Madhu Siddalingaiah] Documentation: add description for repartitionAndSortWithinPartitions --- docs/programming-guide.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/programming-guide.md b/docs/programming-guide.md index 7a16ee8742dc0..5e0d5c15d7069 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -934,6 +934,12 @@ for details. Reshuffle the data in the RDD randomly to create either more or fewer partitions and balance it across them. This always shuffles all data over the network. + + repartitionAndSortWithinPartitions(partitioner) + Repartition the RDD according to the given partitioner and, within each resulting partition, + sort records by their keys. This is more efficient than calling repartition and then sorting within + each partition because it can push the sorting down into the shuffle machinery. + ### Actions From 5db8dcaf494e0dffed4fc22f19b0334d95ab6bfb Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Mon, 1 Dec 2014 13:09:51 -0800 Subject: [PATCH 18/82] [SPARK-4258][SQL][DOC] Documents spark.sql.parquet.filterPushdown Documents `spark.sql.parquet.filterPushdown`, explains why it's turned off by default and when it's safe to be turned on. [Review on Reviewable](https://reviewable.io/reviews/apache/spark/3440) Author: Cheng Lian Closes #3440 from liancheng/parquet-filter-pushdown-doc and squashes the following commits: 2104311 [Cheng Lian] Documents spark.sql.parquet.filterPushdown --- docs/sql-programming-guide.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 24a68bb083334..96a3209c52eb1 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -146,7 +146,7 @@ describes the various methods for loading data into a SchemaRDD. Spark SQL supports two different methods for converting existing RDDs into SchemaRDDs. The first method uses reflection to infer the schema of an RDD that contains specific types of objects. This -reflection based approach leads to more concise code and works well when you already know the schema +reflection based approach leads to more concise code and works well when you already know the schema while writing your Spark application. The second method for creating SchemaRDDs is through a programmatic interface that allows you to @@ -566,7 +566,7 @@ for teenName in teenNames.collect(): ### Configuration -Configuration of Parquet can be done using the `setConf` method on SQLContext or by running +Configuration of Parquet can be done using the `setConf` method on SQLContext or by running `SET key=value` commands using SQL. @@ -575,8 +575,8 @@ Configuration of Parquet can be done using the `setConf` method on SQLContext or @@ -591,10 +591,20 @@ Configuration of Parquet can be done using the `setConf` method on SQLContext or + + + + + @@ -945,7 +955,7 @@ options. ## Migration Guide for Shark User -### Scheduling +### Scheduling To set a [Fair Scheduler](job-scheduling.html#fair-scheduler-pools) pool for a JDBC client session, users can set the `spark.sql.thriftserver.scheduler.pool` variable: From bafee67ebad01f7aea2cd393a70b57eb8345eeb0 Mon Sep 17 00:00:00 2001 From: Jacky Li Date: Mon, 1 Dec 2014 13:12:30 -0800 Subject: [PATCH 19/82] [SQL] add @group tab in limit() and count() group tab is missing for scaladoc Author: Jacky Li Closes #3458 from jackylk/patch-7 and squashes the following commits: 0121a70 [Jacky Li] add @group tab in limit() and count() --- sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala index 452baab8eb889..c6d4dabf83bc4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala @@ -225,6 +225,8 @@ class SchemaRDD( * {{{ * schemaRDD.limit(10) * }}} + * + * @group Query */ def limit(limitNum: Int): SchemaRDD = new SchemaRDD(sqlContext, Limit(Literal(limitNum), logicalPlan)) @@ -355,6 +357,8 @@ class SchemaRDD( * Return the number of elements in the RDD. Unlike the base RDD implementation of count, this * implementation leverages the query optimizer to compute the count on the SchemaRDD, which * supports features such as filter pushdown. + * + * @group Query */ @Experimental override def count(): Long = aggregate(Count(Literal(1))).collect().head.getLong(0) From b57365a1ec89e31470f424ff37d5ebc7c90a39d8 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 1 Dec 2014 13:17:56 -0800 Subject: [PATCH 20/82] [SPARK-4358][SQL] Let BigDecimal do checking type compatibility Remove hardcoding max and min values for types. Let BigDecimal do checking type compatibility. Author: Liang-Chi Hsieh Closes #3208 from viirya/more_numericLit and squashes the following commits: e9834b4 [Liang-Chi Hsieh] Remove byte and short types for number literal. 1bd1825 [Liang-Chi Hsieh] Fix Indentation and make the modification clearer. cf1a997 [Liang-Chi Hsieh] Modified for comment to add a rule of analysis that adds a cast. 91fe489 [Liang-Chi Hsieh] add Byte and Short. 1bdc69d [Liang-Chi Hsieh] Let BigDecimal do checking type compatibility. --- .../org/apache/spark/sql/catalyst/SqlParser.scala | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index dc1d349f10f1b..a9ff10f2d5533 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -340,18 +340,13 @@ class SqlParser extends AbstractSparkSQLParser { | floatLit ^^ { f => Literal(f.toDouble) } ) - private val longMax = BigDecimal(s"${Long.MaxValue}") - private val longMin = BigDecimal(s"${Long.MinValue}") - private val intMax = BigDecimal(s"${Int.MaxValue}") - private val intMin = BigDecimal(s"${Int.MinValue}") - private def toNarrowestIntegerType(value: String) = { val bigIntValue = BigDecimal(value) bigIntValue match { - case v if v < longMin || v > longMax => v - case v if v < intMin || v > intMax => v.toLong - case v => v.toInt + case v if bigIntValue.isValidInt => v.toIntExact + case v if bigIntValue.isValidLong => v.toLongExact + case v => v } } From 6a9ff19dc06745144d5b311d4f87073c81d53a8f Mon Sep 17 00:00:00 2001 From: ravipesala Date: Mon, 1 Dec 2014 13:26:44 -0800 Subject: [PATCH 21/82] [SPARK-4650][SQL] Supporting multi column support in countDistinct function like count(distinct c1,c2..) in Spark SQL Supporting multi column support in countDistinct function like count(distinct c1,c2..) in Spark SQL Author: ravipesala Author: Michael Armbrust Closes #3511 from ravipesala/countdistinct and squashes the following commits: cc4dbb1 [ravipesala] style 070e12a [ravipesala] Supporting multi column support in count(distinct c1,c2..) in Spark SQL --- .../scala/org/apache/spark/sql/catalyst/SqlParser.scala | 3 ++- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index a9ff10f2d5533..a2bcd73b6074f 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -277,7 +277,8 @@ class SqlParser extends AbstractSparkSQLParser { | SUM ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => SumDistinct(exp) } | COUNT ~ "(" ~> "*" <~ ")" ^^ { case _ => Count(Literal(1)) } | COUNT ~ "(" ~> expression <~ ")" ^^ { case exp => Count(exp) } - | COUNT ~> "(" ~> DISTINCT ~> expression <~ ")" ^^ { case exp => CountDistinct(exp :: Nil) } + | COUNT ~> "(" ~> DISTINCT ~> repsep(expression, ",") <~ ")" ^^ + { case exps => CountDistinct(exps) } | APPROXIMATE ~ COUNT ~ "(" ~ DISTINCT ~> expression <~ ")" ^^ { case exp => ApproxCountDistinct(exp) } | APPROXIMATE ~> "(" ~> floatLit ~ ")" ~ COUNT ~ "(" ~ DISTINCT ~ expression <~ ")" ^^ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 84ee3051eb682..f83e647014193 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -992,4 +992,11 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { "nulldata2 on nulldata1.value <=> nulldata2.value"), (1 to 2).map(i => Seq(i))) } + + test("Multi-column COUNT(DISTINCT ...)") { + val data = TestData(1,"val_1") :: TestData(2,"val_2") :: Nil + val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) + rdd.registerTempTable("distinctData") + checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), 2) + } } From bc353819cc86c3b0ad75caf81b47744bfc2aeeb3 Mon Sep 17 00:00:00 2001 From: ravipesala Date: Mon, 1 Dec 2014 13:31:27 -0800 Subject: [PATCH 22/82] [SPARK-4658][SQL] Code documentation issue in DDL of datasource API Author: ravipesala Closes #3516 from ravipesala/ddl_doc and squashes the following commits: d101fdf [ravipesala] Style issues fixed d2238cd [ravipesala] Corrected documentation --- .../main/scala/org/apache/spark/sql/parquet/newParquet.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/sources/ddl.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index bea12e6dd674e..6404fec435178 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -39,8 +39,8 @@ import scala.collection.JavaConversions._ /** * Allows creation of parquet based tables using the syntax - * `CREATE TABLE ... USING org.apache.spark.sql.parquet`. Currently the only option required - * is `path`, which should be the location of a collection of, optionally partitioned, + * `CREATE TEMPORARY TABLE ... USING org.apache.spark.sql.parquet`. Currently the only option + * required is `path`, which should be the location of a collection of, optionally partitioned, * parquet files. */ class DefaultSource extends RelationProvider { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala index 9168ca2fc6fec..ca510cb0b07e3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala @@ -67,7 +67,7 @@ private[sql] class DDLParser extends StandardTokenParsers with PackratParsers wi protected lazy val ddl: Parser[LogicalPlan] = createTable /** - * CREATE FOREIGN TEMPORARY TABLE avroTable + * CREATE TEMPORARY TABLE avroTable * USING org.apache.spark.sql.avro * OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro") */ From 7b79957879db4dfcc7c3601cb40ac4fd576259a5 Mon Sep 17 00:00:00 2001 From: wangfei Date: Mon, 1 Dec 2014 14:02:02 -0800 Subject: [PATCH 23/82] [SQL] Minor fix for doc and comment Author: wangfei Closes #3533 from scwf/sql-doc1 and squashes the following commits: 962910b [wangfei] doc and comment fix --- docs/sql-programming-guide.md | 3 ++- .../org/apache/spark/examples/sql/hive/HiveFromSpark.scala | 7 ++++--- .../scala/org/apache/spark/sql/parquet/newParquet.scala | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 96a3209c52eb1..c38ca556530e6 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1002,7 +1002,7 @@ Several caching related features are not supported yet: ## Compatibility with Apache Hive Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently Spark -SQL is based on Hive 0.12.0. +SQL is based on Hive 0.12.0 and 0.13.1. #### Deploying in Existing Hive Warehouses @@ -1041,6 +1041,7 @@ Spark SQL supports the vast majority of Hive features, such as: * Sampling * Explain * Partitioned tables +* View * All Hive DDL Functions, including: * `CREATE TABLE` * `CREATE TABLE AS SELECT` diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala index 227acc117502d..138923c4d7f2f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala @@ -29,9 +29,10 @@ object HiveFromSpark { val sc = new SparkContext(sparkConf) val path = s"${System.getenv("SPARK_HOME")}/examples/src/main/resources/kv1.txt" - // A local hive context creates an instance of the Hive Metastore in process, storing - // the warehouse data in the current directory. This location can be overridden by - // specifying a second parameter to the constructor. + // A hive context adds support for finding tables in the MetaStore and writing queries + // using HiveQL. Users who do not have an existing Hive deployment can still create a + // HiveContext. When not configured by the hive-site.xml, the context automatically + // creates metastore_db and warehouse in the current directory. val hiveContext = new HiveContext(sc) import hiveContext._ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index 6404fec435178..9b89c3bfb3307 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -49,7 +49,7 @@ class DefaultSource extends RelationProvider { sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val path = - parameters.getOrElse("path", sys.error("'path' must be specifed for parquet tables.")) + parameters.getOrElse("path", sys.error("'path' must be specified for parquet tables.")) ParquetRelation2(path)(sqlContext) } From 5edbcbfb61703398a24ce5162a74aba04e365b0c Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 1 Dec 2014 14:03:57 -0800 Subject: [PATCH 24/82] [SQL][DOC] Date type in SQL programming guide Author: Daoyuan Wang Closes #3535 from adrian-wang/datedoc and squashes the following commits: 18ff1ed [Daoyuan Wang] [DOC] Date type --- docs/sql-programming-guide.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index c38ca556530e6..85d446b9da0e7 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1057,6 +1057,7 @@ Spark SQL supports the vast majority of Hive features, such as: * `STRING` * `BINARY` * `TIMESTAMP` + * `DATE` * `ARRAY<>` * `MAP<>` * `STRUCT<>` @@ -1157,6 +1158,7 @@ evaluated by the SQL execution engine. A full list of the functions supported c * Datetime type - `TimestampType`: Represents values comprising values of fields year, month, day, hour, minute, and second. + - `DateType`: Represents values comprising values of fields year, month, day. * Complex types - `ArrayType(elementType, containsNull)`: Represents values comprising a sequence of elements with the type of `elementType`. `containsNull` is used to indicate if @@ -1264,6 +1266,13 @@ import org.apache.spark.sql._ TimestampType + + + + + @@ -1390,6 +1399,13 @@ please use factory methods provided in DataType.TimestampType + + + + + @@ -1537,6 +1553,13 @@ from pyspark.sql import * TimestampType() + + + + + From 4df60a8cbc58f2877787245c2a83b2de85579c82 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 1 Dec 2014 16:08:51 -0800 Subject: [PATCH 25/82] [SPARK-4529] [SQL] support view with column alias Support view definition like CREATE VIEW view3(valoo) TBLPROPERTIES ("fear" = "factor") AS SELECT upper(value) FROM src WHERE key=86; [valoo as the alias of upper(value)]. This is missing part of SPARK-4239, for a fully view support. Author: Daoyuan Wang Closes #3396 from adrian-wang/viewcolumn and squashes the following commits: 4d001d0 [Daoyuan Wang] support view with column alias --- .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 2 +- .../src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index b9283f668a9b5..f4c42bbc5b03d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -379,7 +379,7 @@ private[hive] object HiveQl { protected def nameExpressions(exprs: Seq[Expression]): Seq[NamedExpression] = { exprs.zipWithIndex.map { case (ne: NamedExpression, _) => ne - case (e, i) => Alias(e, s"c_$i")() + case (e, i) => Alias(e, s"_c$i")() } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index b255a2ebb9778..fecf8faaf4cda 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -279,7 +279,7 @@ private[hive] case class HiveGenericUdtf( } override protected def makeOutput() = { - // Use column names when given, otherwise c_1, c_2, ... c_n. + // Use column names when given, otherwise _c1, _c2, ... _cn. if (aliasNames.size == outputDataTypes.size) { aliasNames.zip(outputDataTypes).map { case (attrName, attrDataType) => @@ -288,7 +288,7 @@ private[hive] case class HiveGenericUdtf( } else { outputDataTypes.zipWithIndex.map { case (attrDataType, i) => - AttributeReference(s"c_$i", attrDataType, nullable = true)() + AttributeReference(s"_c$i", attrDataType, nullable = true)() } } } From d3e02dddf06c82e4baa8708050be291d87d4f367 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Mon, 1 Dec 2014 16:39:54 -0800 Subject: [PATCH 26/82] [SPARK-4268][SQL] Use #::: to get benefit from Stream in SqlLexical.allCaseVersions In addition, using `s.isEmpty` to eliminate the string comparison. Author: zsxwing Closes #3132 from zsxwing/SPARK-4268 and squashes the following commits: 358e235 [zsxwing] Improvement of allCaseVersions --- .../scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala index b198ed9936d95..f1a1ca6616a21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala @@ -97,10 +97,10 @@ class SqlLexical(val keywords: Seq[String]) extends StdLexical { /** Generate all variations of upper and lower case of a given string */ def allCaseVersions(s: String, prefix: String = ""): Stream[String] = { - if (s == "") { + if (s.isEmpty) { Stream(prefix) } else { - allCaseVersions(s.tail, prefix + s.head.toLower) ++ + allCaseVersions(s.tail, prefix + s.head.toLower) #::: allCaseVersions(s.tail, prefix + s.head.toUpper) } } From b0a46d899541ec17db090aac6f9ea1b287ee9331 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 1 Dec 2014 17:27:14 -0800 Subject: [PATCH 27/82] MAINTENANCE: Automated closing of pull requests. This commit exists to close the following pull requests on Github: Closes #1612 (close requested by 'marmbrus') Closes #2723 (close requested by 'marmbrus') Closes #1737 (close requested by 'marmbrus') Closes #2252 (close requested by 'marmbrus') Closes #2029 (close requested by 'marmbrus') Closes #2386 (close requested by 'marmbrus') Closes #2997 (close requested by 'marmbrus') From 64f3175bf976f5a28e691cedc7a4b333709e0c58 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Tue, 2 Dec 2014 11:40:43 +0800 Subject: [PATCH 28/82] [SPARK-4611][MLlib] Implement the efficient vector norm The vector norm in breeze is implemented by `activeIterator` which is known to be very slow. In this PR, an efficient vector norm is implemented, and with this API, `Normalizer` and `k-means` have big performance improvement. Here is the benchmark against mnist8m dataset. a) `Normalizer` Before DenseVector: 68.25secs SparseVector: 17.01secs With this PR DenseVector: 12.71secs SparseVector: 2.73secs b) `k-means` Before DenseVector: 83.46secs SparseVector: 61.60secs With this PR DenseVector: 70.04secs SparseVector: 59.05secs Author: DB Tsai Closes #3462 from dbtsai/norm and squashes the following commits: 63c7165 [DB Tsai] typo 0c3637f [DB Tsai] add import org.apache.spark.SparkContext._ back 6fa616c [DB Tsai] address feedback 9b7cb56 [DB Tsai] move norm to static method 0b632e6 [DB Tsai] kmeans dbed124 [DB Tsai] style c1a877c [DB Tsai] first commit --- .../spark/mllib/clustering/KMeans.scala | 6 +-- .../spark/mllib/feature/Normalizer.scala | 4 +- .../apache/spark/mllib/linalg/Vectors.scala | 51 +++++++++++++++++++ .../spark/mllib/linalg/VectorsSuite.scala | 24 +++++++++ 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 34ea0de706f08..0f8dee58d8464 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.clustering import scala.collection.mutable.ArrayBuffer -import breeze.linalg.{DenseVector => BDV, Vector => BV, norm => breezeNorm} +import breeze.linalg.{DenseVector => BDV, Vector => BV} import org.apache.spark.annotation.Experimental import org.apache.spark.Logging @@ -125,7 +125,7 @@ class KMeans private ( } // Compute squared norms and cache them. - val norms = data.map(v => breezeNorm(v.toBreeze, 2.0)) + val norms = data.map(Vectors.norm(_, 2.0)) norms.persist() val breezeData = data.map(_.toBreeze).zip(norms).map { case (v, norm) => new BreezeVectorWithNorm(v, norm) @@ -425,7 +425,7 @@ object KMeans { private[clustering] class BreezeVectorWithNorm(val vector: BV[Double], val norm: Double) extends Serializable { - def this(vector: BV[Double]) = this(vector, breezeNorm(vector, 2.0)) + def this(vector: BV[Double]) = this(vector, Vectors.norm(Vectors.fromBreeze(vector), 2.0)) def this(array: Array[Double]) = this(new BDV[Double](array)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala index a9c2e23717896..1ced26a9b70a2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala @@ -17,8 +17,6 @@ package org.apache.spark.mllib.feature -import breeze.linalg.{norm => brzNorm} - import org.apache.spark.annotation.Experimental import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} @@ -47,7 +45,7 @@ class Normalizer(p: Double) extends VectorTransformer { * @return normalized vector. If the norm of the input is zero, it will return the input vector. */ override def transform(vector: Vector): Vector = { - val norm = brzNorm(vector.toBreeze, p) + val norm = Vectors.norm(vector, p) if (norm != 0.0) { // For dense vector, we've to allocate new memory for new output vector. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index c6d5fe5bc678c..47d1a76fa361d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -261,6 +261,57 @@ object Vectors { sys.error("Unsupported Breeze vector type: " + v.getClass.getName) } } + + /** + * Returns the p-norm of this vector. + * @param vector input vector. + * @param p norm. + * @return norm in L^p^ space. + */ + private[spark] def norm(vector: Vector, p: Double): Double = { + require(p >= 1.0) + val values = vector match { + case dv: DenseVector => dv.values + case sv: SparseVector => sv.values + case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass) + } + val size = values.size + + if (p == 1) { + var sum = 0.0 + var i = 0 + while (i < size) { + sum += math.abs(values(i)) + i += 1 + } + sum + } else if (p == 2) { + var sum = 0.0 + var i = 0 + while (i < size) { + sum += values(i) * values(i) + i += 1 + } + math.sqrt(sum) + } else if (p == Double.PositiveInfinity) { + var max = 0.0 + var i = 0 + while (i < size) { + val value = math.abs(values(i)) + if (value > max) max = value + i += 1 + } + max + } else { + var sum = 0.0 + var i = 0 + while (i < size) { + sum += math.pow(math.abs(values(i)), p) + i += 1 + } + math.pow(sum, 1.0 / p) + } + } } /** diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 9492f604af4d5..f99f01450992a 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -21,6 +21,7 @@ import breeze.linalg.{DenseMatrix => BDM} import org.scalatest.FunSuite import org.apache.spark.SparkException +import org.apache.spark.mllib.util.TestingUtils._ class VectorsSuite extends FunSuite { @@ -197,4 +198,27 @@ class VectorsSuite extends FunSuite { assert(svMap.get(2) === Some(3.1)) assert(svMap.get(3) === Some(0.0)) } + + test("vector p-norm") { + val dv = Vectors.dense(0.0, -1.2, 3.1, 0.0, -4.5, 1.9) + val sv = Vectors.sparse(6, Seq((1, -1.2), (2, 3.1), (3, 0.0), (4, -4.5), (5, 1.9))) + + assert(Vectors.norm(dv, 1.0) ~== dv.toArray.foldLeft(0.0)((a, v) => + a + math.abs(v)) relTol 1E-8) + assert(Vectors.norm(sv, 1.0) ~== sv.toArray.foldLeft(0.0)((a, v) => + a + math.abs(v)) relTol 1E-8) + + assert(Vectors.norm(dv, 2.0) ~== math.sqrt(dv.toArray.foldLeft(0.0)((a, v) => + a + v * v)) relTol 1E-8) + assert(Vectors.norm(sv, 2.0) ~== math.sqrt(sv.toArray.foldLeft(0.0)((a, v) => + a + v * v)) relTol 1E-8) + + assert(Vectors.norm(dv, Double.PositiveInfinity) ~== dv.toArray.map(math.abs).max relTol 1E-8) + assert(Vectors.norm(sv, Double.PositiveInfinity) ~== sv.toArray.map(math.abs).max relTol 1E-8) + + assert(Vectors.norm(dv, 3.7) ~== math.pow(dv.toArray.foldLeft(0.0)((a, v) => + a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8) + assert(Vectors.norm(sv, 3.7) ~== math.pow(sv.toArray.foldLeft(0.0)((a, v) => + a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8) + } } From 6dfe38a03a619282815b4032243a20414eea712e Mon Sep 17 00:00:00 2001 From: zsxwing Date: Tue, 2 Dec 2014 00:18:41 -0800 Subject: [PATCH 29/82] [SPARK-4397][Core] Cleanup 'import SparkContext._' in core This PR cleans up `import SparkContext._` in core for SPARK-4397(#3262) to prove it really works well. Author: zsxwing Closes #3530 from zsxwing/SPARK-4397-cleanup and squashes the following commits: 04e2273 [zsxwing] Cleanup 'import SparkContext._' in core --- .../main/scala/org/apache/spark/api/java/JavaRDDLike.scala | 1 - .../main/scala/org/apache/spark/api/python/PythonRDD.scala | 1 - core/src/main/scala/org/apache/spark/package.scala | 4 ++-- .../main/scala/org/apache/spark/rdd/AsyncRDDActions.scala | 1 - .../scala/org/apache/spark/rdd/DoubleRDDFunctions.scala | 1 - .../scala/org/apache/spark/rdd/OrderedRDDFunctions.scala | 7 +++---- .../main/scala/org/apache/spark/rdd/PairRDDFunctions.scala | 2 -- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 6 +++--- .../org/apache/spark/rdd/SequenceFileRDDFunctions.scala | 1 - .../scala/org/apache/spark/ui/UIWorkloadGenerator.scala | 1 - .../apache/spark/util/random/StratifiedSamplingUtils.scala | 1 - .../src/test/scala/org/apache/spark/AccumulatorSuite.scala | 1 - core/src/test/scala/org/apache/spark/CheckpointSuite.scala | 1 - .../test/scala/org/apache/spark/ContextCleanerSuite.scala | 1 - .../src/test/scala/org/apache/spark/DistributedSuite.scala | 1 - .../org/apache/spark/ExternalShuffleServiceSuite.scala | 1 - core/src/test/scala/org/apache/spark/FailureSuite.scala | 1 - core/src/test/scala/org/apache/spark/FileServerSuite.scala | 1 - .../test/scala/org/apache/spark/FutureActionSuite.scala | 1 - .../scala/org/apache/spark/ImplicitOrderingSuite.scala | 1 - .../test/scala/org/apache/spark/JobCancellationSuite.scala | 1 - .../test/scala/org/apache/spark/PartitioningSuite.scala | 1 - core/src/test/scala/org/apache/spark/ShuffleSuite.scala | 1 - .../test/scala/org/apache/spark/StatusTrackerSuite.scala | 1 - .../scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala | 1 - .../test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala | 1 - .../scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala | 1 - core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala | 2 -- .../src/test/scala/org/apache/spark/rdd/SortingSuite.scala | 1 - .../org/apache/spark/scheduler/DAGSchedulerSuite.scala | 1 - .../org/apache/spark/scheduler/ReplayListenerSuite.scala | 1 - .../org/apache/spark/scheduler/SparkListenerSuite.scala | 1 - .../spark/serializer/KryoSerializerDistributedSuite.scala | 1 - .../test/scala/org/apache/spark/ui/UISeleniumSuite.scala | 1 - .../spark/util/collection/ExternalAppendOnlyMapSuite.scala | 1 - .../apache/spark/util/collection/ExternalSorterSuite.scala | 1 - 36 files changed, 8 insertions(+), 44 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 5a8e5bb1f721a..ac42294d56def 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -28,7 +28,6 @@ import com.google.common.base.Optional import org.apache.hadoop.io.compress.CompressionCodec import org.apache.spark._ -import org.apache.spark.SparkContext._ import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.JavaPairRDD._ import org.apache.spark.api.java.JavaSparkContext.fakeClassTag diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index e0bc00e1eb249..bad40e6529f74 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -34,7 +34,6 @@ import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.{InputFormat, OutputFormat, JobConf} import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, OutputFormat => NewOutputFormat} import org.apache.spark._ -import org.apache.spark.SparkContext._ import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD} import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala index 436dbed1730bc..5ad73c3d27f47 100644 --- a/core/src/main/scala/org/apache/spark/package.scala +++ b/core/src/main/scala/org/apache/spark/package.scala @@ -27,8 +27,8 @@ package org.apache * contains operations available only on RDDs of Doubles; and * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that can * be saved as SequenceFiles. These operations are automatically available on any RDD of the right - * type (e.g. RDD[(Int, Int)] through implicit conversions when you - * `import org.apache.spark.SparkContext._`. + * type (e.g. RDD[(Int, Int)] through implicit conversions except `saveAsSequenceFile`. You need to + * `import org.apache.spark.SparkContext._` to make `saveAsSequenceFile` work. * * Java programmers should reference the [[org.apache.spark.api.java]] package * for Spark programming APIs in Java. diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala index 9f9f10b7ebc3a..646df283ac069 100644 --- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala @@ -27,7 +27,6 @@ import org.apache.spark.{ComplexFutureAction, FutureAction, Logging} /** * A set of asynchronous RDD actions available through an implicit conversion. - * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions. */ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Logging { diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala index e0494ee39657c..e66f83bb34e30 100644 --- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala @@ -27,7 +27,6 @@ import org.apache.spark.util.StatCounter /** * Extra functions available on RDDs of Doubles through an implicit conversion. - * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions. */ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { /** Add up the elements in this RDD. */ diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala index d0dbfef35d03c..144f679a59460 100644 --- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala @@ -24,10 +24,9 @@ import org.apache.spark.annotation.DeveloperApi /** * Extra functions available on RDDs of (key, value) pairs where the key is sortable through - * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to - * use these functions. They will work with any key type `K` that has an implicit `Ordering[K]` in - * scope. Ordering objects already exist for all of the standard primitive types. Users can also - * define their own orderings for custom types, or to override the default ordering. The implicit + * an implicit conversion. They will work with any key type `K` that has an implicit `Ordering[K]` + * in scope. Ordering objects already exist for all of the standard primitive types. Users can also + * define their own orderings for custom types, or to override the default ordering. The implicit * ordering that is in the closest scope will be used. * * {{{ diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index 8c2c959e73bb6..e78e57678852f 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -37,7 +37,6 @@ RecordWriter => NewRecordWriter} import org.apache.spark._ import org.apache.spark.Partitioner.defaultPartitioner -import org.apache.spark.SparkContext._ import org.apache.spark.annotation.Experimental import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.executor.{DataWriteMethod, OutputMetrics} @@ -50,7 +49,6 @@ import org.apache.spark.util.random.StratifiedSamplingUtils /** * Extra functions available on RDDs of (key, value) pairs through an implicit conversion. - * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions. */ class PairRDDFunctions[K, V](self: RDD[(K, V)]) (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 3add4a76192ca..8dfd952298f30 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -34,7 +34,6 @@ import org.apache.hadoop.mapred.TextOutputFormat import org.apache.spark._ import org.apache.spark.Partitioner._ -import org.apache.spark.SparkContext._ import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.JavaRDD import org.apache.spark.broadcast.Broadcast @@ -58,8 +57,9 @@ import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, Bernoulli * Doubles; and * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that * can be saved as SequenceFiles. - * These operations are automatically available on any RDD of the right type (e.g. RDD[(Int, Int)] - * through implicit conversions when you `import org.apache.spark.SparkContext._`. + * All operations are automatically available on any RDD of the right type (e.g. RDD[(Int, Int)] + * through implicit conversions except `saveAsSequenceFile`. You need to + * `import org.apache.spark.SparkContext._` to make `saveAsSequenceFile` work. * * Internally, each RDD is characterized by five main properties: * diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala index 9a1efc83cbe6a..2b48916951430 100644 --- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala @@ -24,7 +24,6 @@ import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.spark.Logging -import org.apache.spark.SparkContext._ /** * Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile, diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala index 18d2b5075aa08..b4677447c8872 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala @@ -20,7 +20,6 @@ package org.apache.spark.ui import scala.util.Random import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.SparkContext._ import org.apache.spark.scheduler.SchedulingMode /** diff --git a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala index 4fa357edd6f07..2ae308dacf1ae 100644 --- a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala @@ -25,7 +25,6 @@ import scala.reflect.ClassTag import org.apache.commons.math3.distribution.PoissonDistribution import org.apache.spark.Logging -import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD /** diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala index 52d1d5277658e..f087fc550dde3 100644 --- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala @@ -22,7 +22,6 @@ import scala.collection.mutable import org.scalatest.FunSuite import org.scalatest.Matchers -import org.apache.spark.SparkContext._ class AccumulatorSuite extends FunSuite with Matchers with LocalSparkContext { diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index a41914a1a9d0c..3b10b3a042317 100644 --- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -23,7 +23,6 @@ import scala.reflect.ClassTag import org.scalatest.FunSuite -import org.apache.spark.SparkContext._ import org.apache.spark.rdd._ import org.apache.spark.storage.{BlockId, StorageLevel, TestBlockId} import org.apache.spark.util.Utils diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala index 2e3fc5ef0e336..ae2ae7ed0d3aa 100644 --- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala @@ -28,7 +28,6 @@ import org.scalatest.concurrent.{PatienceConfiguration, Eventually} import org.scalatest.concurrent.Eventually._ import org.scalatest.time.SpanSugar._ -import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.storage._ import org.apache.spark.shuffle.hash.HashShuffleManager diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala index 429199f2075c6..998f3008ec0ea 100644 --- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala +++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala @@ -23,7 +23,6 @@ import org.scalatest.concurrent.Timeouts._ import org.scalatest.Matchers import org.scalatest.time.{Millis, Span} -import org.apache.spark.SparkContext._ import org.apache.spark.storage.{RDDBlockId, StorageLevel} class NotSerializableClass diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala index 55799f55146cb..cc3592ee43a35 100644 --- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala @@ -21,7 +21,6 @@ import java.util.concurrent.atomic.AtomicInteger import org.scalatest.BeforeAndAfterAll -import org.apache.spark.SparkContext._ import org.apache.spark.network.TransportContext import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.server.TransportServer diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala index 2229e6acc425d..1212d0b43207d 100644 --- a/core/src/test/scala/org/apache/spark/FailureSuite.scala +++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark import org.scalatest.FunSuite -import org.apache.spark.SparkContext._ import org.apache.spark.util.NonSerializable import java.io.NotSerializableException diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala index 379c2a6ea4b55..49426545c767e 100644 --- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala @@ -23,7 +23,6 @@ import java.util.jar.{JarEntry, JarOutputStream} import com.google.common.io.ByteStreams import org.scalatest.FunSuite -import org.apache.spark.SparkContext._ import org.apache.spark.util.Utils class FileServerSuite extends FunSuite with LocalSparkContext { diff --git a/core/src/test/scala/org/apache/spark/FutureActionSuite.scala b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala index db9c25fc457a4..f5cdb01ec9504 100644 --- a/core/src/test/scala/org/apache/spark/FutureActionSuite.scala +++ b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala @@ -22,7 +22,6 @@ import scala.concurrent.duration.Duration import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} -import org.apache.spark.SparkContext._ class FutureActionSuite extends FunSuite with BeforeAndAfter with Matchers with LocalSparkContext { diff --git a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala index 8e4a9e2c9f56c..d895230ecf330 100644 --- a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala +++ b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark import org.scalatest.FunSuite import org.apache.spark.rdd.RDD -import org.apache.spark.SparkContext._ class ImplicitOrderingSuite extends FunSuite with LocalSparkContext { // Tests that PairRDDFunctions grabs an implicit Ordering in various cases where it should. diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala index a57430e829ced..41ed2bce55ce1 100644 --- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala +++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala @@ -27,7 +27,6 @@ import scala.concurrent.future import org.scalatest.{BeforeAndAfter, FunSuite} import org.scalatest.Matchers -import org.apache.spark.SparkContext._ import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart} /** diff --git a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala index 646ede30ae6ff..b7532314ada01 100644 --- a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala +++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala @@ -22,7 +22,6 @@ import scala.math.abs import org.scalatest.{FunSuite, PrivateMethodTester} -import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.util.StatCounter diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala index 5d20b4dc1561a..5a133c0490444 100644 --- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark import org.scalatest.FunSuite import org.scalatest.Matchers -import org.apache.spark.SparkContext._ import org.apache.spark.ShuffleSuite.NonJavaSerializableClass import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD} import org.apache.spark.serializer.KryoSerializer diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala index 8577e4ac7e33e..41d6ea29d5b06 100644 --- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala @@ -25,7 +25,6 @@ import org.scalatest.{Matchers, FunSuite} import org.scalatest.concurrent.Eventually._ import org.apache.spark.JobExecutionStatus._ -import org.apache.spark.SparkContext._ class StatusTrackerSuite extends FunSuite with Matchers with LocalSparkContext { diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala index 3b833f2e41867..f2b0ea1063a72 100644 --- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala @@ -27,7 +27,6 @@ import org.scalatest.{BeforeAndAfterAll, FunSuite} import org.scalatest.concurrent.Timeouts import org.scalatest.time.SpanSugar._ -import org.apache.spark.SparkContext._ import org.apache.spark.{SparkContext, SparkException, LocalSparkContext} class AsyncRDDActionsSuite extends FunSuite with BeforeAndAfterAll with Timeouts { diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala index f89bdb6e07dea..de306533752c1 100644 --- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.rdd import org.scalatest.FunSuite import org.apache.spark._ -import org.apache.spark.SparkContext._ class DoubleRDDSuite extends FunSuite with SharedSparkContext { // Verify tests on the histogram functionality. We test with both evenly diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index 3620e251cc139..108f70af43f37 100644 --- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -29,7 +29,6 @@ import org.apache.hadoop.mapreduce.{JobContext => NewJobContext, OutputCommitter OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, TaskAttemptContext => NewTaskAttempContext} import org.apache.spark.{Partitioner, SharedSparkContext} -import org.apache.spark.SparkContext._ import org.apache.spark.util.Utils import org.scalatest.FunSuite diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index e079ca3b1e896..6d9be796144b6 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -24,7 +24,6 @@ import scala.reflect.ClassTag import org.scalatest.FunSuite import org.apache.spark._ -import org.apache.spark.SparkContext._ import org.apache.spark.util.Utils import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} @@ -97,7 +96,6 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("partitioner aware union") { - import SparkContext._ def makeRDDWithPartitioner(seq: Seq[Int]) = { sc.makeRDD(seq, 1) .map(x => (x, null)) diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala index 656917628f7a8..a40f2ffeffdf9 100644 --- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala @@ -21,7 +21,6 @@ import org.scalatest.FunSuite import org.scalatest.Matchers import org.apache.spark.{Logging, SharedSparkContext} -import org.apache.spark.SparkContext._ class SortingSuite extends FunSuite with SharedSparkContext with Matchers with Logging { diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index bdd721dc7eaf7..436eea4f1fdcf 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -27,7 +27,6 @@ import org.scalatest.concurrent.Timeouts import org.scalatest.time.SpanSugar._ import org.apache.spark._ -import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.scheduler.SchedulingMode.SchedulingMode import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster} diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala index e05f373392d4a..90bdfe07f61c9 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala @@ -22,7 +22,6 @@ import java.io.{File, PrintWriter} import org.json4s.jackson.JsonMethods._ import org.scalatest.{BeforeAndAfter, FunSuite} -import org.apache.spark.SparkContext._ import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.io.CompressionCodec diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala index abe0dc35b07e2..b276343cb412c 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala @@ -25,7 +25,6 @@ import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite} import org.scalatest.Matchers import org.apache.spark.{LocalSparkContext, SparkContext} -import org.apache.spark.SparkContext._ import org.apache.spark.executor.TaskMetrics class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala index 11e8c9c4cb37f..855f1b6276089 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala @@ -23,7 +23,6 @@ import com.esotericsoftware.kryo.Kryo import org.scalatest.FunSuite import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv, TestUtils} -import org.apache.spark.SparkContext._ import org.apache.spark.serializer.KryoDistributedTest._ class KryoSerializerDistributedSuite extends FunSuite { diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index d2857b8b55664..787f4c2b5a8b2 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -27,7 +27,6 @@ import org.scalatest.selenium.WebBrowser import org.scalatest.time.SpanSugar._ import org.apache.spark._ -import org.apache.spark.SparkContext._ import org.apache.spark.LocalSparkContext._ import org.apache.spark.api.java.StorageLevels import org.apache.spark.shuffle.FetchFailedException diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala index 511d76c9144cc..48f79ea651018 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala @@ -22,7 +22,6 @@ import scala.collection.mutable.ArrayBuffer import org.scalatest.FunSuite import org.apache.spark._ -import org.apache.spark.SparkContext._ import org.apache.spark.io.CompressionCodec class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext { diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala index 3cb42d416de4f..72d96798b1141 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala @@ -22,7 +22,6 @@ import scala.collection.mutable.ArrayBuffer import org.scalatest.{PrivateMethodTester, FunSuite} import org.apache.spark._ -import org.apache.spark.SparkContext._ import scala.util.Random From d9a148ba6a67a01e4bf77c35c41dd4cbc8918c82 Mon Sep 17 00:00:00 2001 From: Kay Ousterhout Date: Tue, 2 Dec 2014 09:06:02 -0800 Subject: [PATCH 30/82] [SPARK-4686] Link to allowed master URLs is broken The link points to the old scala programming guide; it should point to the submitting applications page. This should be backported to 1.1.2 (it's been broken as of 1.0). Author: Kay Ousterhout Closes #3542 from kayousterhout/SPARK-4686 and squashes the following commits: a8fc43b [Kay Ousterhout] [SPARK-4686] Link to allowed master URLs is broken --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 0b77f5ab645c9..4b4bbea564d3a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -98,7 +98,7 @@ of the most common options to set are: From b1f8fe316a6904841f0159ec02159b1af0ad730e Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Tue, 2 Dec 2014 11:59:15 -0800 Subject: [PATCH 31/82] Indent license header properly for interfaces.scala. A very small nit update. Author: Reynold Xin Closes #3552 from rxin/license-header and squashes the following commits: df8d1a4 [Reynold Xin] Indent license header properly for interfaces.scala. --- .../apache/spark/sql/sources/interfaces.scala | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 2b8fc05fc0102..939b4e15163a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.spark.sql.sources import org.apache.spark.annotation.{Experimental, DeveloperApi} @@ -111,5 +111,3 @@ abstract class PrunedFilteredScan extends BaseRelation { abstract class CatalystScan extends BaseRelation { def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row] } - - From e75e04f980281389b881df76f59ba1adc6338629 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 2 Dec 2014 12:07:52 -0800 Subject: [PATCH 32/82] [SPARK-4536][SQL] Add sqrt and abs to Spark SQL DSL Spark SQL has embeded sqrt and abs but DSL doesn't support those functions. Author: Kousuke Saruta Closes #3401 from sarutak/dsl-missing-operator and squashes the following commits: 07700cf [Kousuke Saruta] Modified Literal(null, NullType) to Literal(null) in DslQuerySuite 8f366f8 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into dsl-missing-operator 1b88e2e [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into dsl-missing-operator 0396f89 [Kousuke Saruta] Added sqrt and abs to Spark SQL DSL --- .../spark/sql/catalyst/dsl/package.scala | 2 + .../sql/catalyst/expressions/arithmetic.scala | 1 - .../org/apache/spark/sql/DslQuerySuite.scala | 68 +++++++++++++++++++ .../scala/org/apache/spark/sql/TestData.scala | 4 ++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 31dc5a58e68e5..70dabc4e6c2e9 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -147,6 +147,8 @@ package object dsl { def max(e: Expression) = Max(e) def upper(e: Expression) = Upper(e) def lower(e: Expression) = Lower(e) + def sqrt(e: Expression) = Sqrt(e) + def abs(e: Expression) = Abs(e) implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s = sym.name } // TODO more implicit class for literal? diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index d17c9553ac24e..900b7586adcda 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.types._ -import scala.math.pow case class UnaryMinus(child: Expression) extends UnaryExpression { type EvaluatedType = Any diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala index 94bd97758fe94..1a330a2bb6d46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala @@ -282,4 +282,72 @@ class DslQuerySuite extends QueryTest { (1, "1", "11") :: (2, "2", "22") :: (3, "3", "33") :: Nil ) } + + test("sqrt") { + checkAnswer( + testData.select(sqrt('key)).orderBy('key asc), + (1 to 100).map(n => Seq(math.sqrt(n))) + ) + + checkAnswer( + testData.select(sqrt('value), 'key).orderBy('key asc, 'value asc), + (1 to 100).map(n => Seq(math.sqrt(n), n)) + ) + + checkAnswer( + testData.select(sqrt(Literal(null))), + (1 to 100).map(_ => Seq(null)) + ) + } + + test("abs") { + checkAnswer( + testData.select(abs('key)).orderBy('key asc), + (1 to 100).map(n => Seq(n)) + ) + + checkAnswer( + negativeData.select(abs('key)).orderBy('key desc), + (1 to 100).map(n => Seq(n)) + ) + + checkAnswer( + testData.select(abs(Literal(null))), + (1 to 100).map(_ => Seq(null)) + ) + } + + test("upper") { + checkAnswer( + lowerCaseData.select(upper('l)), + ('a' to 'd').map(c => Seq(c.toString.toUpperCase())) + ) + + checkAnswer( + testData.select(upper('value), 'key), + (1 to 100).map(n => Seq(n.toString, n)) + ) + + checkAnswer( + testData.select(upper(Literal(null))), + (1 to 100).map(n => Seq(null)) + ) + } + + test("lower") { + checkAnswer( + upperCaseData.select(lower('L)), + ('A' to 'F').map(c => Seq(c.toString.toLowerCase())) + ) + + checkAnswer( + testData.select(lower('value), 'key), + (1 to 100).map(n => Seq(n.toString, n)) + ) + + checkAnswer( + testData.select(lower(Literal(null))), + (1 to 100).map(n => Seq(null)) + ) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala index 933e027436e75..bb553a0a1e50c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala @@ -32,6 +32,10 @@ object TestData { (1 to 100).map(i => TestData(i, i.toString))).toSchemaRDD testData.registerTempTable("testData") + val negativeData = TestSQLContext.sparkContext.parallelize( + (1 to 100).map(i => TestData(-i, (-i).toString))).toSchemaRDD + negativeData.registerTempTable("negativeData") + case class LargeAndSmallInts(a: Int, b: Int) val largeAndSmallInts = TestSQLContext.sparkContext.parallelize( From 69b6fed206565ecb0173d3757bcb5110422887c3 Mon Sep 17 00:00:00 2001 From: baishuo Date: Tue, 2 Dec 2014 12:12:03 -0800 Subject: [PATCH 33/82] [SPARK-4663][sql]add finally to avoid resource leak Author: baishuo Closes #3526 from baishuo/master-trycatch and squashes the following commits: d446e14 [baishuo] correct the code style b36bf96 [baishuo] correct the code style ae0e447 [baishuo] add finally to avoid resource leak --- .../spark/sql/parquet/ParquetTableOperations.scala | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala index 0e36852ddd9b0..232ef90b017a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala @@ -302,11 +302,14 @@ case class InsertIntoParquetTable( val committer = format.getOutputCommitter(hadoopContext) committer.setupTask(hadoopContext) val writer = format.getRecordWriter(hadoopContext) - while (iter.hasNext) { - val row = iter.next() - writer.write(null, row) + try { + while (iter.hasNext) { + val row = iter.next() + writer.write(null, row) + } + } finally { + writer.close(hadoopContext) } - writer.close(hadoopContext) committer.commitTask(hadoopContext) 1 } From 10664276007beca3843638e558f504cad44b1fb3 Mon Sep 17 00:00:00 2001 From: YanTangZhai Date: Tue, 2 Dec 2014 14:12:48 -0800 Subject: [PATCH 34/82] [SPARK-4676][SQL] JavaSchemaRDD.schema may throw NullType MatchError if sql has null val jsc = new org.apache.spark.api.java.JavaSparkContext(sc) val jhc = new org.apache.spark.sql.hive.api.java.JavaHiveContext(jsc) val nrdd = jhc.hql("select null from spark_test.for_test") println(nrdd.schema) Then the error is thrown as follows: scala.MatchError: NullType (of class org.apache.spark.sql.catalyst.types.NullType$) at org.apache.spark.sql.types.util.DataTypeConversions$.asJavaDataType(DataTypeConversions.scala:43) Author: YanTangZhai Author: yantangzhai Author: Michael Armbrust Closes #3538 from YanTangZhai/MatchNullType and squashes the following commits: e052dff [yantangzhai] [SPARK-4676] [SQL] JavaSchemaRDD.schema may throw NullType MatchError if sql has null 4b4bb34 [yantangzhai] [SPARK-4676] [SQL] JavaSchemaRDD.schema may throw NullType MatchError if sql has null 896c7b7 [yantangzhai] fix NullType MatchError in JavaSchemaRDD when sql has null 6e643f8 [YanTangZhai] Merge pull request #11 from apache/master e249846 [YanTangZhai] Merge pull request #10 from apache/master d26d982 [YanTangZhai] Merge pull request #9 from apache/master 76d4027 [YanTangZhai] Merge pull request #8 from apache/master 03b62b0 [YanTangZhai] Merge pull request #7 from apache/master 8a00106 [YanTangZhai] Merge pull request #6 from apache/master cbcba66 [YanTangZhai] Merge pull request #3 from apache/master cdef539 [YanTangZhai] Merge pull request #1 from apache/master --- .../apache/spark/sql/api/java/DataType.java | 5 ++++ .../apache/spark/sql/api/java/NullType.java | 27 +++++++++++++++++++ .../scala/org/apache/spark/sql/package.scala | 10 +++++++ .../sql/types/util/DataTypeConversions.scala | 1 + .../spark/sql/api/java/JavaSQLSuite.scala | 16 +++++++++++ 5 files changed, 59 insertions(+) create mode 100644 sql/core/src/main/java/org/apache/spark/sql/api/java/NullType.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java index c38354039d686..c69bbd5736a5b 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java +++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java @@ -82,6 +82,11 @@ public abstract class DataType { */ public static final ShortType ShortType = new ShortType(); + /** + * Gets the NullType object. + */ + public static final NullType NullType = new NullType(); + /** * Creates an ArrayType by specifying the data type of elements ({@code elementType}). * The field of {@code containsNull} is set to {@code true}. diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/NullType.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/NullType.java new file mode 100644 index 0000000000000..6d5ecdf46e551 --- /dev/null +++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/NullType.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.api.java; + +/** + * The data type representing null and NULL values. + * + * {@code NullType} is represented by the singleton object {@link DataType#NullType}. + */ +public class NullType extends DataType { + protected NullType() {} +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index 51dad54f1a3f3..1fd8e6220f83b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -263,6 +263,16 @@ package object sql { @DeveloperApi val ShortType = catalyst.types.ShortType + /** + * :: DeveloperApi :: + * + * The data type representing `NULL` values. + * + * @group dataType + */ + @DeveloperApi + val NullType = catalyst.types.NullType + /** * :: DeveloperApi :: * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala index d4258156f18f6..4160a80621c77 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala @@ -62,6 +62,7 @@ protected[sql] object DataTypeConversions { case IntegerType => JDataType.IntegerType case LongType => JDataType.LongType case ShortType => JDataType.ShortType + case NullType => JDataType.NullType case arrayType: ArrayType => JDataType.createArrayType( asJavaDataType(arrayType.elementType), arrayType.containsNull) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala index c9012c9e47cff..8afc3a9fb2187 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/api/java/JavaSQLSuite.scala @@ -68,6 +68,22 @@ class JavaSQLSuite extends FunSuite { javaSqlCtx.sql("SELECT * FROM people").collect() } + test("schema with null from JavaBeans") { + val person = new PersonBean + person.setName("Michael") + person.setAge(29) + + val rdd = javaCtx.parallelize(person :: Nil) + val schemaRDD = javaSqlCtx.applySchema(rdd, classOf[PersonBean]) + + schemaRDD.registerTempTable("people") + val nullRDD = javaSqlCtx.sql("SELECT null FROM people") + val structFields = nullRDD.schema.getFields() + assert(structFields.size == 1) + assert(structFields(0).getDataType().isInstanceOf[NullType]) + assert(nullRDD.collect.head.row === Seq(null)) + } + test("all types in JavaBeans") { val bean = new AllTypesBean bean.setStringField("") From f6df609dcc4f4a18c0f1c74b1ae0800cf09fa7ae Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 2 Dec 2014 14:21:12 -0800 Subject: [PATCH 35/82] [SPARK-4593][SQL] Return null when denominator is 0 SELECT max(1/0) FROM src would return a very large number, which is obviously not right. For hive-0.12, hive would return `Infinity` for 1/0, while for hive-0.13.1, it is `NULL` for 1/0. I think it is better to keep our behavior with newer Hive version. This PR ensures that when the divider is 0, the result of expression should be NULL, same with hive-0.13.1 Author: Daoyuan Wang Closes #3443 from adrian-wang/div and squashes the following commits: 2e98677 [Daoyuan Wang] fix code gen for divide 0 85c28ba [Daoyuan Wang] temp 36236a5 [Daoyuan Wang] add test cases 6f5716f [Daoyuan Wang] fix comments cee92bd [Daoyuan Wang] avoid evaluation 2 times 22ecd9a [Daoyuan Wang] fix style cf28c58 [Daoyuan Wang] divide fix 2dfe50f [Daoyuan Wang] return null when divider is 0 of Double type --- .../sql/catalyst/expressions/Expression.scala | 41 +++++++++++++++++++ .../sql/catalyst/expressions/arithmetic.scala | 13 ++++-- .../expressions/codegen/CodeGenerator.scala | 19 ++++++++- .../ExpressionEvaluationSuite.scala | 15 +++++++ 4 files changed, 83 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 39b120e8de485..bc45881e42748 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -153,6 +153,25 @@ abstract class Expression extends TreeNode[Expression] { } } + /** + * Evaluation helper function for 1 Fractional children expression. + * if the expression result is null, the evaluation result should be null. + */ + @inline + protected final def f1(i: Row, e1: Expression, f: ((Fractional[Any], Any) => Any)): Any = { + val evalE1 = e1.eval(i: Row) + if(evalE1 == null) { + null + } else { + e1.dataType match { + case ft: FractionalType => + f.asInstanceOf[(Fractional[ft.JvmType], ft.JvmType) => ft.JvmType]( + ft.fractional, evalE1.asInstanceOf[ft.JvmType]) + case other => sys.error(s"Type $other does not support fractional operations") + } + } + } + /** * Evaluation helper function for 2 Integral children expressions. Those expressions are * supposed to be in the same data type, and also the return type. @@ -189,6 +208,28 @@ abstract class Expression extends TreeNode[Expression] { } } + /** + * Evaluation helper function for 1 Integral children expression. + * if the expression result is null, the evaluation result should be null. + */ + @inline + protected final def i1(i: Row, e1: Expression, f: ((Integral[Any], Any) => Any)): Any = { + val evalE1 = e1.eval(i) + if(evalE1 == null) { + null + } else { + e1.dataType match { + case i: IntegralType => + f.asInstanceOf[(Integral[i.JvmType], i.JvmType) => i.JvmType]( + i.integral, evalE1.asInstanceOf[i.JvmType]) + case i: FractionalType => + f.asInstanceOf[(Integral[i.JvmType], i.JvmType) => i.JvmType]( + i.asIntegral, evalE1.asInstanceOf[i.JvmType]) + case other => sys.error(s"Type $other does not support numeric operations") + } + } + } + /** * Evaluation helper function for 2 Comparable children expressions. Those expressions are * supposed to be in the same data type, and the return type should be Integer: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 900b7586adcda..7ec18b8419e20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -105,11 +105,16 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti case class Divide(left: Expression, right: Expression) extends BinaryArithmetic { def symbol = "/" - override def nullable = left.nullable || right.nullable || dataType.isInstanceOf[DecimalType] + override def nullable = true - override def eval(input: Row): Any = dataType match { - case _: FractionalType => f2(input, left, right, _.div(_, _)) - case _: IntegralType => i2(input, left , right, _.quot(_, _)) + override def eval(input: Row): Any = { + val evalE2 = right.eval(input) + dataType match { + case _ if evalE2 == null => null + case _ if evalE2 == 0 => null + case ft: FractionalType => f1(input, left, _.div(_, evalE2.asInstanceOf[ft.JvmType])) + case it: IntegralType => i1(input, left, _.quot(_, evalE2.asInstanceOf[it.JvmType])) + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 67f8d411b6bb4..ab71e15e1f573 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -359,7 +359,24 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin case Add(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" } case Subtract(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 - $eval2" } case Multiply(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 * $eval2" } - case Divide(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 / $eval2" } + case Divide(e1, e2) => + val eval1 = expressionEvaluator(e1) + val eval2 = expressionEvaluator(e2) + + eval1.code ++ eval2.code ++ + q""" + var $nullTerm = false + var $primitiveTerm: ${termForType(e1.dataType)} = 0 + + if (${eval1.nullTerm} || ${eval2.nullTerm} ) { + $nullTerm = true + } else if (${eval2.primitiveTerm} == 0) + $nullTerm = true + else { + $nullTerm = false + $primitiveTerm = ${eval1.primitiveTerm} / ${eval2.primitiveTerm} + } + """.children case IsNotNull(e) => val eval = expressionEvaluator(e) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 3f5b9f698f827..25f56424888aa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -149,6 +149,21 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))) && In(Literal(2), Seq(Literal(1), Literal(2))), true) } + test("Divide") { + checkEvaluation(Divide(Literal(2), Literal(1)), 2) + checkEvaluation(Divide(Literal(1.0), Literal(2.0)), 0.5) + checkEvaluation(Divide(Literal(1), Literal(2)), 0) + checkEvaluation(Divide(Literal(1), Literal(0)), null) + checkEvaluation(Divide(Literal(1.0), Literal(0.0)), null) + checkEvaluation(Divide(Literal(0.0), Literal(0.0)), null) + checkEvaluation(Divide(Literal(0), Literal(null, IntegerType)), null) + checkEvaluation(Divide(Literal(1), Literal(null, IntegerType)), null) + checkEvaluation(Divide(Literal(null, IntegerType), Literal(0)), null) + checkEvaluation(Divide(Literal(null, DoubleType), Literal(0.0)), null) + checkEvaluation(Divide(Literal(null, IntegerType), Literal(1)), null) + checkEvaluation(Divide(Literal(null, IntegerType), Literal(null, IntegerType)), null) + } + test("INSET") { val hS = HashSet[Any]() + 1 + 2 val nS = HashSet[Any]() + 1 + 2 + null From 1f5ddf17e831ad9717f0f4b60a727a3381fad4f9 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 2 Dec 2014 14:25:12 -0800 Subject: [PATCH 36/82] [SPARK-4670] [SQL] wrong symbol for bitwise not We should use `~` instead of `-` for bitwise NOT. Author: Daoyuan Wang Closes #3528 from adrian-wang/symbol and squashes the following commits: affd4ad [Daoyuan Wang] fix code gen test case 56efb79 [Daoyuan Wang] ensure bitwise NOT over byte and short persist data type f55fbae [Daoyuan Wang] wrong symbol for bitwise not --- .../sql/catalyst/expressions/arithmetic.scala | 20 +++++++++---------- .../ExpressionEvaluationSuite.scala | 15 ++++++++++++++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 7ec18b8419e20..61c26c50a6662 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -42,7 +42,7 @@ case class Sqrt(child: Expression) extends UnaryExpression { override def toString = s"SQRT($child)" override def eval(input: Row): Any = { - n1(child, input, ((na,a) => math.sqrt(na.toDouble(a)))) + n1(child, input, (na,a) => math.sqrt(na.toDouble(a))) } } @@ -138,7 +138,7 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme case ShortType => (evalE1.asInstanceOf[Short] & evalE2.asInstanceOf[Short]).toShort case IntegerType => evalE1.asInstanceOf[Int] & evalE2.asInstanceOf[Int] case LongType => evalE1.asInstanceOf[Long] & evalE2.asInstanceOf[Long] - case other => sys.error(s"Unsupported bitwise & operation on ${other}") + case other => sys.error(s"Unsupported bitwise & operation on $other") } } @@ -153,7 +153,7 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet case ShortType => (evalE1.asInstanceOf[Short] | evalE2.asInstanceOf[Short]).toShort case IntegerType => evalE1.asInstanceOf[Int] | evalE2.asInstanceOf[Int] case LongType => evalE1.asInstanceOf[Long] | evalE2.asInstanceOf[Long] - case other => sys.error(s"Unsupported bitwise | operation on ${other}") + case other => sys.error(s"Unsupported bitwise | operation on $other") } } @@ -168,7 +168,7 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme case ShortType => (evalE1.asInstanceOf[Short] ^ evalE2.asInstanceOf[Short]).toShort case IntegerType => evalE1.asInstanceOf[Int] ^ evalE2.asInstanceOf[Int] case LongType => evalE1.asInstanceOf[Long] ^ evalE2.asInstanceOf[Long] - case other => sys.error(s"Unsupported bitwise ^ operation on ${other}") + case other => sys.error(s"Unsupported bitwise ^ operation on $other") } } @@ -181,7 +181,7 @@ case class BitwiseNot(child: Expression) extends UnaryExpression { def dataType = child.dataType override def foldable = child.foldable def nullable = child.nullable - override def toString = s"-$child" + override def toString = s"~$child" override def eval(input: Row): Any = { val evalE = child.eval(input) @@ -189,11 +189,11 @@ case class BitwiseNot(child: Expression) extends UnaryExpression { null } else { dataType match { - case ByteType => (~(evalE.asInstanceOf[Byte])).toByte - case ShortType => (~(evalE.asInstanceOf[Short])).toShort - case IntegerType => ~(evalE.asInstanceOf[Int]) - case LongType => ~(evalE.asInstanceOf[Long]) - case other => sys.error(s"Unsupported bitwise ~ operation on ${other}") + case ByteType => (~evalE.asInstanceOf[Byte]).toByte + case ShortType => (~evalE.asInstanceOf[Short]).toShort + case IntegerType => ~evalE.asInstanceOf[Int] + case LongType => ~evalE.asInstanceOf[Long] + case other => sys.error(s"Unsupported bitwise ~ operation on $other") } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 25f56424888aa..cd2f67f448b0b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -42,6 +42,21 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation(Literal(1) + Literal(1), 2) } + test("unary BitwiseNOT") { + checkEvaluation(BitwiseNot(1), -2) + assert(BitwiseNot(1).dataType === IntegerType) + assert(BitwiseNot(1).eval(EmptyRow).isInstanceOf[Int]) + checkEvaluation(BitwiseNot(1.toLong), -2.toLong) + assert(BitwiseNot(1.toLong).dataType === LongType) + assert(BitwiseNot(1.toLong).eval(EmptyRow).isInstanceOf[Long]) + checkEvaluation(BitwiseNot(1.toShort), -2.toShort) + assert(BitwiseNot(1.toShort).dataType === ShortType) + assert(BitwiseNot(1.toShort).eval(EmptyRow).isInstanceOf[Short]) + checkEvaluation(BitwiseNot(1.toByte), -2.toByte) + assert(BitwiseNot(1.toByte).dataType === ByteType) + assert(BitwiseNot(1.toByte).eval(EmptyRow).isInstanceOf[Byte]) + } + /** * Checks for three-valued-logic. Based on: * http://en.wikipedia.org/wiki/Null_(SQL)#Comparisons_with_NULL_and_the_three-valued_logic_.283VL.29 From 3ae0cda83c5106136e90d59c20e61db345a5085f Mon Sep 17 00:00:00 2001 From: wangfei Date: Tue, 2 Dec 2014 14:30:44 -0800 Subject: [PATCH 37/82] [SPARK-4695][SQL] Get result using executeCollect Using ```executeCollect``` to collect the result, because executeCollect is a custom implementation of collect in spark sql which better than rdd's collect Author: wangfei Closes #3547 from scwf/executeCollect and squashes the following commits: a5ab68e [wangfei] Revert "adding debug info" a60d680 [wangfei] fix test failure 0db7ce8 [wangfei] adding debug info 184c594 [wangfei] using executeCollect instead collect --- .../main/scala/org/apache/spark/sql/hive/HiveContext.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 304b9a73ee91d..34fc21e61f60f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -377,7 +377,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { command.executeCollect().map(_.head.toString) case other => - val result: Seq[Seq[Any]] = toRdd.map(_.copy()).collect().toSeq + val result: Seq[Seq[Any]] = other.executeCollect().toSeq // We need the types so we can output struct field names val types = analyzed.output.map(_.dataType) // Reformat to match hive tab delimited output. @@ -416,6 +416,8 @@ object HiveContext { case (bin: Array[Byte], BinaryType) => new String(bin, "UTF-8") case (decimal: Decimal, DecimalType()) => // Hive strips trailing zeros so use its toString HiveShim.createDecimal(decimal.toBigDecimal.underlying()).toString + case (decimal: BigDecimal, DecimalType()) => + HiveShim.createDecimal(decimal.underlying()).toString case (other, tpe) if primitiveTypes contains tpe => other.toString } From 2d4f6e70f7de50489c2b5f0d6a4756c3b1aace7d Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Tue, 2 Dec 2014 14:40:26 -0800 Subject: [PATCH 38/82] Minor nit style cleanup in GraphX. --- graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala index 1db3df03c8052..09ae3f9f6c09b 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala @@ -68,7 +68,7 @@ abstract class VertexRDD[VD]( * Provides the `RDD[(VertexId, VD)]` equivalent output. */ override def compute(part: Partition, context: TaskContext): Iterator[(VertexId, VD)] = { - firstParent[ShippableVertexPartition[VD]].iterator(part, context).next.iterator + firstParent[ShippableVertexPartition[VD]].iterator(part, context).next().iterator } /** From 5da21f07d862212067719ddaa2fef6e09db21c10 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 2 Dec 2014 16:36:12 -0800 Subject: [PATCH 39/82] [Release] Translate unknown author names automatically --- dev/create-release/generate-contributors.py | 36 ++++---- dev/create-release/releaseutils.py | 93 +++++++++++++++++++++ 2 files changed, 111 insertions(+), 18 deletions(-) diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py index f4bf734081583..99c29ef9ff8b6 100755 --- a/dev/create-release/generate-contributors.py +++ b/dev/create-release/generate-contributors.py @@ -26,23 +26,11 @@ # You must set the following before use! JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") +JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None) +JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None) START_COMMIT = os.environ.get("START_COMMIT", "37b100") END_COMMIT = os.environ.get("END_COMMIT", "3693ae") -try: - from jira.client import JIRA -except ImportError: - print "This tool requires the jira-python library" - print "Install using 'sudo pip install jira-python'" - sys.exit(-1) - -try: - import unidecode -except ImportError: - print "This tool requires the unidecode library to decode obscure github usernames" - print "Install using 'sudo pip install unidecode'" - sys.exit(-1) - # If commit range is not specified, prompt the user to provide it if not START_COMMIT or not END_COMMIT: print "A commit range is required to proceed." @@ -52,6 +40,8 @@ END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ") # Verify provided arguments +if not JIRA_USERNAME: sys.exit("JIRA_USERNAME must be provided") +if not JIRA_PASSWORD: sys.exit("JIRA_PASSWORD must be provided") start_commit_line = get_one_line(START_COMMIT) end_commit_line = get_one_line(END_COMMIT) num_commits = num_commits_in_range(START_COMMIT, END_COMMIT) @@ -70,6 +60,14 @@ sys.exit("Ok, exiting") print "==================================================================================\n" +# Setup JIRA and github clients. We use two JIRA clients, one with authentication +# and one without, because authentication is slow and required only when we query +# JIRA user details but not Spark issues +jira_options = { "server": JIRA_API_BASE } +jira_client = JIRA(options = jira_options) +jira_client_auth = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD)) +github_client = Github() + # Find all commits within this range print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT) commits = get_one_line_commits(START_COMMIT, END_COMMIT) @@ -129,14 +127,16 @@ def print_indented(_list): # } # author_info = {} -jira_options = { "server": JIRA_API_BASE } -jira = JIRA(jira_options) print "\n=========================== Compiling contributor list ===========================" for commit in filtered_commits: commit_hash = re.findall("^[a-z0-9]+", commit)[0] issues = re.findall("SPARK-[0-9]+", commit.upper()) + # Translate the author in case the github username is not an actual name + # Also guard against any special characters used in the name + # Note the JIRA client we use here must have authentication enabled author = get_author(commit_hash) - author = unidecode.unidecode(unicode(author, "UTF-8")) # guard against special characters + author = unidecode.unidecode(unicode(author, "UTF-8")) + author = translate_author(author, github_client, jira_client_auth, warnings) date = get_date(commit_hash) # Parse components from the commit message, if any commit_components = find_components(commit, commit_hash) @@ -151,7 +151,7 @@ def populate(issue_type, components): author_info[author][issue_type].add(component) # Find issues and components associated with this commit for issue in issues: - jira_issue = jira.issue(issue) + jira_issue = jira_client.issue(issue) jira_type = jira_issue.fields.issuetype.name jira_type = translate_issue_type(jira_type, issue, warnings) jira_components = [translate_component(c.name, commit_hash, warnings)\ diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py index e56d7fa58fa2c..0d6830b11dc73 100755 --- a/dev/create-release/releaseutils.py +++ b/dev/create-release/releaseutils.py @@ -21,6 +21,29 @@ import re from subprocess import Popen, PIPE +try: + from jira.client import JIRA + from jira.exceptions import JIRAError +except ImportError: + print "This tool requires the jira-python library" + print "Install using 'sudo pip install jira-python'" + sys.exit(-1) + +try: + from github import Github + from github import GithubException +except ImportError: + print "This tool requires the PyGithub library" + print "Install using 'sudo pip install PyGithub'" + sys.exit(-1) + +try: + import unidecode +except ImportError: + print "This tool requires the unidecode library to decode obscure github usernames" + print "Install using 'sudo pip install unidecode'" + sys.exit(-1) + # Utility functions run git commands (written with Git 1.8.5) def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0] def get_author(commit_hash): @@ -122,3 +145,73 @@ def nice_join(str_list): else: return ", ".join(str_list[:-1]) + ", and " + str_list[-1] +# Return the full name of the specified user on Github +# If the user doesn't exist, return None +def get_github_name(author, github_client): + if github_client: + try: + return github_client.get_user(author).name + except GithubException as e: + # If this is not a "not found" exception + if e.status != 404: + raise e + return None + +# Return the full name of the specified user on JIRA +# If the user doesn't exist, return None +def get_jira_name(author, jira_client): + if jira_client: + try: + return jira_client.user(author).displayName + except JIRAError as e: + # If this is not a "not found" exception + if e.status_code != 404: + raise e + return None + +# Return whether the given name is in the form +def is_valid_author(author): + if not author: return False + author_words = len(author.split(" ")) + return author_words == 2 or author_words == 3 + +# Capitalize the first letter of each word in the given author name +def capitalize_author(author): + if not author: return None + words = author.split(" ") + words = [w[0].capitalize() + w[1:] for w in words if w] + return " ".join(words) + +# Maintain a mapping of translated author names as a cache +translated_authors = {} + +# Format the given author in a format appropriate for the contributors list. +# If the author is not an actual name, search github and JIRA for potential +# replacements and log all candidates as a warning. +def translate_author(github_author, github_client, jira_client, warnings): + if is_valid_author(github_author): + return capitalize_author(github_author) + # If the translated author is already cached, just return it + if github_author in translated_authors: + return translated_authors[github_author] + # Otherwise, author name is not found, so we need to search for an alternative name + candidates = set() + github_name = get_github_name(github_author, github_client) + jira_name = get_jira_name(github_author, jira_client) + if is_valid_author(github_name): github_name = capitalize_author(github_name) + if is_valid_author(jira_name): jira_name = capitalize_author(jira_name) + if github_name: candidates.add(github_name) + if jira_name: candidates.add(jira_name) + # Only use the github name as a replacement automatically + # The JIRA name may not make sense because it can belong to someone else + if is_valid_author(github_name): + candidates_message = " (another candidate is %s)" % jira_name if jira_name else "" + warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message)) + translated_authors[github_name] = github_name + return translated_authors[github_name] + # No direct replacement, so return the original author and list any candidates found + candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else "" + warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message)) + translated_authors[github_author] = github_author + return translated_authors[github_author] + From fc0a1475ef7c8b33363d88adfe8e8f28def5afc7 Mon Sep 17 00:00:00 2001 From: JerryLead Date: Tue, 2 Dec 2014 17:08:02 -0800 Subject: [PATCH 40/82] [SPARK-4672][GraphX]Perform checkpoint() on PartitionsRDD to shorten the lineage The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 Iterative GraphX applications always have long lineage, while checkpoint() on EdgeRDD and VertexRDD themselves cannot shorten the lineage. In contrast, if we perform checkpoint() on their ParitionsRDD, the long lineage can be cut off. Moreover, the existing operations such as cache() in this code is performed on the PartitionsRDD, so checkpoint() should do the same way. More details and explanation can be found in the JIRA. Author: JerryLead Author: Lijie Xu Closes #3549 from JerryLead/my_graphX_checkpoint and squashes the following commits: d1aa8d8 [JerryLead] Perform checkpoint() on PartitionsRDD not VertexRDD and EdgeRDD themselves ff08ed4 [JerryLead] Merge branch 'master' of https://github.com/apache/spark c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master --- .../main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala | 4 ++++ .../scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala index a8169613b4fd2..504559da977d8 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala @@ -70,6 +70,10 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] ( this } + override def checkpoint() = { + partitionsRDD.checkpoint() + } + /** The number of edges in the RDD. */ override def count(): Long = { partitionsRDD.map(_._2.size.toLong).reduce(_ + _) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala index d92a55a189298..c8898b1369565 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala @@ -71,6 +71,10 @@ class VertexRDDImpl[VD] private[graphx] ( this } + override def checkpoint() = { + partitionsRDD.checkpoint() + } + /** The number of vertices in the RDD. */ override def count(): Long = { partitionsRDD.map(_.size).reduce(_ + _) From 17c162f6682520e6e2790626e37da3a074471793 Mon Sep 17 00:00:00 2001 From: JerryLead Date: Tue, 2 Dec 2014 17:14:11 -0800 Subject: [PATCH 41/82] [SPARK-4672][GraphX]Non-transient PartitionsRDDs will lead to StackOverflow error The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 In a nutshell, if `val partitionsRDD` in EdgeRDDImpl and VertexRDDImpl are non-transient, the serialization chain can become very long in iterative algorithms and finally lead to the StackOverflow error. More details and explanation can be found in the JIRA. Author: JerryLead Author: Lijie Xu Closes #3544 from JerryLead/my_graphX and squashes the following commits: 628f33c [JerryLead] set PartitionsRDD to be transient in EdgeRDDImpl and VertexRDDImpl c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master --- .../main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala | 2 +- .../main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala index 504559da977d8..897c7ee12a436 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala @@ -26,7 +26,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] ( - override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], + @transient override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) extends EdgeRDD[ED](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) { diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala index c8898b1369565..9732c5b00c6d9 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala @@ -27,7 +27,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ class VertexRDDImpl[VD] private[graphx] ( - val partitionsRDD: RDD[ShippableVertexPartition[VD]], + @transient val partitionsRDD: RDD[ShippableVertexPartition[VD]], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) (implicit override protected val vdTag: ClassTag[VD]) extends VertexRDD[VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) { From 77be8b986fd21b7bbe28aa8db1042cb22bc74fe7 Mon Sep 17 00:00:00 2001 From: JerryLead Date: Tue, 2 Dec 2014 23:53:29 -0800 Subject: [PATCH 42/82] [SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 The f closure of `PartitionsRDD(ZippedPartitionsRDD2)` contains a `$outer` that references EdgeRDD/VertexRDD, which causes task's serialization chain become very long in iterative GraphX applications. As a result, StackOverflow error will occur. If we set "f = null" in `clearDependencies()`, checkpoint() can cut off the long serialization chain. More details and explanation can be found in the JIRA. Author: JerryLead Author: Lijie Xu Closes #3545 from JerryLead/my_core and squashes the following commits: f7faea5 [JerryLead] checkpoint() should clear the f to avoid StackOverflow error c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master --- .../scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala index 996f2cd3f34a3..95b2dd954e9f4 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala @@ -77,7 +77,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag]( private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, - f: (Iterator[A], Iterator[B]) => Iterator[V], + var f: (Iterator[A], Iterator[B]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], preservesPartitioning: Boolean = false) @@ -92,13 +92,14 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag] super.clearDependencies() rdd1 = null rdd2 = null + f = null } } private[spark] class ZippedPartitionsRDD3 [A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag]( sc: SparkContext, - f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V], + var f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], var rdd3: RDD[C], @@ -117,13 +118,14 @@ private[spark] class ZippedPartitionsRDD3 rdd1 = null rdd2 = null rdd3 = null + f = null } } private[spark] class ZippedPartitionsRDD4 [A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag]( sc: SparkContext, - f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], + var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], var rdd3: RDD[C], @@ -145,5 +147,6 @@ private[spark] class ZippedPartitionsRDD4 rdd2 = null rdd3 = null rdd4 = null + f = null } } From 8af551f71d4c192753daa52f005bde831eb92429 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Wed, 3 Dec 2014 02:05:17 -0800 Subject: [PATCH 43/82] [SPARK-4397][Core] Change the 'since' value of '@deprecated' to '1.3.0' As #3262 wasn't merged to branch 1.2, the `since` value of `deprecated` should be '1.3.0'. Author: zsxwing Closes #3573 from zsxwing/SPARK-4397-version and squashes the following commits: 1daa03c [zsxwing] Change the 'since' value to '1.3.0' --- .../scala/org/apache/spark/SparkContext.scala | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 9b0d5be7a7ab2..532f292952f05 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1630,28 +1630,28 @@ object SparkContext extends Logging { // following ones. @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") object DoubleAccumulatorParam extends AccumulatorParam[Double] { def addInPlace(t1: Double, t2: Double): Double = t1 + t2 def zero(initialValue: Double) = 0.0 } @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") object IntAccumulatorParam extends AccumulatorParam[Int] { def addInPlace(t1: Int, t2: Int): Int = t1 + t2 def zero(initialValue: Int) = 0 } @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") object LongAccumulatorParam extends AccumulatorParam[Long] { def addInPlace(t1: Long, t2: Long) = t1 + t2 def zero(initialValue: Long) = 0L } @deprecated("Replaced by implicit objects in AccumulatorParam. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") object FloatAccumulatorParam extends AccumulatorParam[Float] { def addInPlace(t1: Float, t2: Float) = t1 + t2 def zero(initialValue: Float) = 0f @@ -1662,34 +1662,34 @@ object SparkContext extends Logging { // and just call the corresponding functions in `object RDD`. @deprecated("Replaced by implicit functions in the RDD companion object. This is " + - "kept here only for backward compatibility.", "1.2.0") + "kept here only for backward compatibility.", "1.3.0") def rddToPairRDDFunctions[K, V](rdd: RDD[(K, V)]) (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null) = { RDD.rddToPairRDDFunctions(rdd) } @deprecated("Replaced by implicit functions in the RDD companion object. This is " + - "kept here only for backward compatibility.", "1.2.0") + "kept here only for backward compatibility.", "1.3.0") def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = RDD.rddToAsyncRDDActions(rdd) @deprecated("Replaced by implicit functions in the RDD companion object. This is " + - "kept here only for backward compatibility.", "1.2.0") + "kept here only for backward compatibility.", "1.3.0") def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag]( rdd: RDD[(K, V)]) = RDD.rddToSequenceFileRDDFunctions(rdd) @deprecated("Replaced by implicit functions in the RDD companion object. This is " + - "kept here only for backward compatibility.", "1.2.0") + "kept here only for backward compatibility.", "1.3.0") def rddToOrderedRDDFunctions[K : Ordering : ClassTag, V: ClassTag]( rdd: RDD[(K, V)]) = RDD.rddToOrderedRDDFunctions(rdd) @deprecated("Replaced by implicit functions in the RDD companion object. This is " + - "kept here only for backward compatibility.", "1.2.0") + "kept here only for backward compatibility.", "1.3.0") def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = RDD.doubleRDDToDoubleRDDFunctions(rdd) @deprecated("Replaced by implicit functions in the RDD companion object. This is " + - "kept here only for backward compatibility.", "1.2.0") + "kept here only for backward compatibility.", "1.3.0") def numericRDDToDoubleRDDFunctions[T](rdd: RDD[T])(implicit num: Numeric[T]) = RDD.numericRDDToDoubleRDDFunctions(rdd) @@ -1722,42 +1722,42 @@ object SparkContext extends Logging { // and just call the corresponding functions in `object WritableConverter`. @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def intWritableConverter(): WritableConverter[Int] = WritableConverter.intWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def longWritableConverter(): WritableConverter[Long] = WritableConverter.longWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def doubleWritableConverter(): WritableConverter[Double] = WritableConverter.doubleWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def floatWritableConverter(): WritableConverter[Float] = WritableConverter.floatWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def booleanWritableConverter(): WritableConverter[Boolean] = WritableConverter.booleanWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def bytesWritableConverter(): WritableConverter[Array[Byte]] = WritableConverter.bytesWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def stringWritableConverter(): WritableConverter[String] = WritableConverter.stringWritableConverter() @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + - "backward compatibility.", "1.2.0") + "backward compatibility.", "1.3.0") def writableWritableConverter[T <: Writable]() = WritableConverter.writableWritableConverter() From 4ac21511547dc6227d05bf61821cd2d9ab5ede74 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Wed, 3 Dec 2014 18:50:03 +0800 Subject: [PATCH 44/82] [SPARK-4710] [mllib] Eliminate MLlib compilation warnings Renamed StreamingKMeans to StreamingKMeansExample to avoid warning about name conflict with StreamingKMeans class. Added import to DecisionTreeRunner to eliminate warning. CC: mengxr Author: Joseph K. Bradley Closes #3568 from jkbradley/ml-compilation-warnings and squashes the following commits: 64d6bc4 [Joseph K. Bradley] Updated DecisionTreeRunner.scala and StreamingKMeans.scala to eliminate compilation warnings, including renaming StreamingKMeans to StreamingKMeansExample. --- .../examples/mllib/DecisionTreeRunner.scala | 2 ++ ...KMeans.scala => StreamingKMeansExample.scala} | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) rename examples/src/main/scala/org/apache/spark/examples/mllib/{StreamingKMeans.scala => StreamingKMeansExample.scala} (90%) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index 98f9d1689c8e7..54953adb5f3df 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -17,6 +17,8 @@ package org.apache.spark.examples.mllib +import scala.language.reflectiveCalls + import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala similarity index 90% rename from examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeans.scala rename to examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala index 33e5760aed997..8bb12d2ee9ed2 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala @@ -17,10 +17,10 @@ package org.apache.spark.examples.mllib +import org.apache.spark.SparkConf +import org.apache.spark.mllib.clustering.StreamingKMeans import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.clustering.StreamingKMeans -import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} /** @@ -36,28 +36,28 @@ import org.apache.spark.streaming.{Seconds, StreamingContext} * `(y,[x1,x2,x3,...,xn])` * Where y is some identifier. n must be the same for train and test. * - * Usage: StreamingKmeans + * Usage: + * StreamingKMeansExample * * To run on your local machine using the two directories `trainingDir` and `testDir`, * with updates every 5 seconds, 2 dimensions per data point, and 3 clusters, call: - * $ bin/run-example \ - * org.apache.spark.examples.mllib.StreamingKMeans trainingDir testDir 5 3 2 + * $ bin/run-example mllib.StreamingKMeansExample trainingDir testDir 5 3 2 * * As you add text files to `trainingDir` the clusters will continuously update. * Anytime you add text files to `testDir`, you'll see predicted labels using the current model. * */ -object StreamingKMeans { +object StreamingKMeansExample { def main(args: Array[String]) { if (args.length != 5) { System.err.println( - "Usage: StreamingKMeans " + + "Usage: StreamingKMeansExample " + " ") System.exit(1) } - val conf = new SparkConf().setMaster("local").setAppName("StreamingLinearRegression") + val conf = new SparkConf().setMaster("local").setAppName("StreamingKMeansExample") val ssc = new StreamingContext(conf, Seconds(args(2).toLong)) val trainingData = ssc.textFileStream(args(0)).map(Vectors.parse) From 7fc49ed91168999d24ae7b4cc46fbb4ec87febc1 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Wed, 3 Dec 2014 19:01:56 +0800 Subject: [PATCH 45/82] [SPARK-4708][MLLib] Make k-mean runs two/three times faster with dense/sparse sample Note that the usage of `breezeSquaredDistance` in `org.apache.spark.mllib.util.MLUtils.fastSquaredDistance` is in the critical path, and `breezeSquaredDistance` is slow. We should replace it with our own implementation. Here is the benchmark against mnist8m dataset. Before DenseVector: 70.04secs SparseVector: 59.05secs With this PR DenseVector: 30.58secs SparseVector: 21.14secs Author: DB Tsai Closes #3565 from dbtsai/kmean and squashes the following commits: 08bc068 [DB Tsai] restyle de24662 [DB Tsai] address feedback b185a77 [DB Tsai] cleanup 4554ddd [DB Tsai] first commit --- .../spark/mllib/clustering/KMeans.scala | 67 +++++++++---------- .../spark/mllib/clustering/KMeansModel.scala | 10 +-- .../spark/mllib/clustering/LocalKMeans.scala | 22 +++--- .../org/apache/spark/mllib/util/MLUtils.scala | 26 ++++--- .../spark/mllib/util/MLUtilsSuite.scala | 13 ++-- 5 files changed, 70 insertions(+), 68 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 0f8dee58d8464..54c301d3e9e14 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -19,12 +19,11 @@ package org.apache.spark.mllib.clustering import scala.collection.mutable.ArrayBuffer -import breeze.linalg.{DenseVector => BDV, Vector => BV} - import org.apache.spark.annotation.Experimental import org.apache.spark.Logging import org.apache.spark.SparkContext._ import org.apache.spark.mllib.linalg.{Vector, Vectors} +import org.apache.spark.mllib.linalg.BLAS.{axpy, scal} import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel @@ -127,10 +126,10 @@ class KMeans private ( // Compute squared norms and cache them. val norms = data.map(Vectors.norm(_, 2.0)) norms.persist() - val breezeData = data.map(_.toBreeze).zip(norms).map { case (v, norm) => - new BreezeVectorWithNorm(v, norm) + val zippedData = data.zip(norms).map { case (v, norm) => + new VectorWithNorm(v, norm) } - val model = runBreeze(breezeData) + val model = runAlgorithm(zippedData) norms.unpersist() // Warn at the end of the run as well, for increased visibility. @@ -142,9 +141,9 @@ class KMeans private ( } /** - * Implementation of K-Means using breeze. + * Implementation of K-Means algorithm. */ - private def runBreeze(data: RDD[BreezeVectorWithNorm]): KMeansModel = { + private def runAlgorithm(data: RDD[VectorWithNorm]): KMeansModel = { val sc = data.sparkContext @@ -170,9 +169,10 @@ class KMeans private ( // Execute iterations of Lloyd's algorithm until all runs have converged while (iteration < maxIterations && !activeRuns.isEmpty) { - type WeightedPoint = (BV[Double], Long) - def mergeContribs(p1: WeightedPoint, p2: WeightedPoint): WeightedPoint = { - (p1._1 += p2._1, p1._2 + p2._2) + type WeightedPoint = (Vector, Long) + def mergeContribs(x: WeightedPoint, y: WeightedPoint): WeightedPoint = { + axpy(1.0, x._1, y._1) + (y._1, x._2 + y._2) } val activeCenters = activeRuns.map(r => centers(r)).toArray @@ -185,16 +185,17 @@ class KMeans private ( val thisActiveCenters = bcActiveCenters.value val runs = thisActiveCenters.length val k = thisActiveCenters(0).length - val dims = thisActiveCenters(0)(0).vector.length + val dims = thisActiveCenters(0)(0).vector.size - val sums = Array.fill(runs, k)(BDV.zeros[Double](dims).asInstanceOf[BV[Double]]) + val sums = Array.fill(runs, k)(Vectors.zeros(dims)) val counts = Array.fill(runs, k)(0L) points.foreach { point => (0 until runs).foreach { i => val (bestCenter, cost) = KMeans.findClosest(thisActiveCenters(i), point) costAccums(i) += cost - sums(i)(bestCenter) += point.vector + val sum = sums(i)(bestCenter) + axpy(1.0, point.vector, sum) counts(i)(bestCenter) += 1 } } @@ -212,8 +213,8 @@ class KMeans private ( while (j < k) { val (sum, count) = totalContribs((i, j)) if (count != 0) { - sum /= count.toDouble - val newCenter = new BreezeVectorWithNorm(sum) + scal(1.0 / count, sum) + val newCenter = new VectorWithNorm(sum) if (KMeans.fastSquaredDistance(newCenter, centers(run)(j)) > epsilon * epsilon) { changed = true } @@ -245,18 +246,18 @@ class KMeans private ( logInfo(s"The cost for the best run is $minCost.") - new KMeansModel(centers(bestRun).map(c => Vectors.fromBreeze(c.vector))) + new KMeansModel(centers(bestRun).map(_.vector)) } /** * Initialize `runs` sets of cluster centers at random. */ - private def initRandom(data: RDD[BreezeVectorWithNorm]) - : Array[Array[BreezeVectorWithNorm]] = { + private def initRandom(data: RDD[VectorWithNorm]) + : Array[Array[VectorWithNorm]] = { // Sample all the cluster centers in one pass to avoid repeated scans val sample = data.takeSample(true, runs * k, new XORShiftRandom().nextInt()).toSeq Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).map { v => - new BreezeVectorWithNorm(v.vector.toDenseVector, v.norm) + new VectorWithNorm(Vectors.dense(v.vector.toArray), v.norm) }.toArray) } @@ -269,8 +270,8 @@ class KMeans private ( * * The original paper can be found at http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf. */ - private def initKMeansParallel(data: RDD[BreezeVectorWithNorm]) - : Array[Array[BreezeVectorWithNorm]] = { + private def initKMeansParallel(data: RDD[VectorWithNorm]) + : Array[Array[VectorWithNorm]] = { // Initialize each run's center to a random point val seed = new XORShiftRandom().nextInt() val sample = data.takeSample(true, runs, seed).toSeq @@ -376,8 +377,8 @@ object KMeans { * Returns the index of the closest center to the given point, as well as the squared distance. */ private[mllib] def findClosest( - centers: TraversableOnce[BreezeVectorWithNorm], - point: BreezeVectorWithNorm): (Int, Double) = { + centers: TraversableOnce[VectorWithNorm], + point: VectorWithNorm): (Int, Double) = { var bestDistance = Double.PositiveInfinity var bestIndex = 0 var i = 0 @@ -402,8 +403,8 @@ object KMeans { * Returns the K-means cost of a given point against the given cluster centers. */ private[mllib] def pointCost( - centers: TraversableOnce[BreezeVectorWithNorm], - point: BreezeVectorWithNorm): Double = + centers: TraversableOnce[VectorWithNorm], + point: VectorWithNorm): Double = findClosest(centers, point)._2 /** @@ -411,26 +412,24 @@ object KMeans { * [[org.apache.spark.mllib.util.MLUtils#fastSquaredDistance]]. */ private[clustering] def fastSquaredDistance( - v1: BreezeVectorWithNorm, - v2: BreezeVectorWithNorm): Double = { + v1: VectorWithNorm, + v2: VectorWithNorm): Double = { MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm) } } /** - * A breeze vector with its norm for fast distance computation. + * A vector with its norm for fast distance computation. * * @see [[org.apache.spark.mllib.clustering.KMeans#fastSquaredDistance]] */ private[clustering] -class BreezeVectorWithNorm(val vector: BV[Double], val norm: Double) extends Serializable { - - def this(vector: BV[Double]) = this(vector, Vectors.norm(Vectors.fromBreeze(vector), 2.0)) +class VectorWithNorm(val vector: Vector, val norm: Double) extends Serializable { - def this(array: Array[Double]) = this(new BDV[Double](array)) + def this(vector: Vector) = this(vector, Vectors.norm(vector, 2.0)) - def this(v: Vector) = this(v.toBreeze) + def this(array: Array[Double]) = this(Vectors.dense(array)) /** Converts the vector to a dense vector. */ - def toDense = new BreezeVectorWithNorm(vector.toDenseVector, norm) + def toDense = new VectorWithNorm(Vectors.dense(vector.toArray), norm) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index 12a3d91cd31a6..3b95a9e6936e8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -32,14 +32,14 @@ class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable { /** Returns the cluster index that a given point belongs to. */ def predict(point: Vector): Int = { - KMeans.findClosest(clusterCentersWithNorm, new BreezeVectorWithNorm(point))._1 + KMeans.findClosest(clusterCentersWithNorm, new VectorWithNorm(point))._1 } /** Maps given points to their cluster indices. */ def predict(points: RDD[Vector]): RDD[Int] = { val centersWithNorm = clusterCentersWithNorm val bcCentersWithNorm = points.context.broadcast(centersWithNorm) - points.map(p => KMeans.findClosest(bcCentersWithNorm.value, new BreezeVectorWithNorm(p))._1) + points.map(p => KMeans.findClosest(bcCentersWithNorm.value, new VectorWithNorm(p))._1) } /** Maps given points to their cluster indices. */ @@ -53,9 +53,9 @@ class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable { def computeCost(data: RDD[Vector]): Double = { val centersWithNorm = clusterCentersWithNorm val bcCentersWithNorm = data.context.broadcast(centersWithNorm) - data.map(p => KMeans.pointCost(bcCentersWithNorm.value, new BreezeVectorWithNorm(p))).sum() + data.map(p => KMeans.pointCost(bcCentersWithNorm.value, new VectorWithNorm(p))).sum() } - private def clusterCentersWithNorm: Iterable[BreezeVectorWithNorm] = - clusterCenters.map(new BreezeVectorWithNorm(_)) + private def clusterCentersWithNorm: Iterable[VectorWithNorm] = + clusterCenters.map(new VectorWithNorm(_)) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala index f0722d7c14a46..b2f140e1b1352 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala @@ -19,9 +19,9 @@ package org.apache.spark.mllib.clustering import scala.util.Random -import breeze.linalg.{Vector => BV, DenseVector => BDV, norm => breezeNorm} - import org.apache.spark.Logging +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.mllib.linalg.BLAS.{axpy, scal} /** * An utility object to run K-means locally. This is private to the ML package because it's used @@ -35,14 +35,14 @@ private[mllib] object LocalKMeans extends Logging { */ def kMeansPlusPlus( seed: Int, - points: Array[BreezeVectorWithNorm], + points: Array[VectorWithNorm], weights: Array[Double], k: Int, maxIterations: Int - ): Array[BreezeVectorWithNorm] = { + ): Array[VectorWithNorm] = { val rand = new Random(seed) - val dimensions = points(0).vector.length - val centers = new Array[BreezeVectorWithNorm](k) + val dimensions = points(0).vector.size + val centers = new Array[VectorWithNorm](k) // Initialize centers by sampling using the k-means++ procedure. centers(0) = pickWeighted(rand, points, weights).toDense @@ -75,14 +75,12 @@ private[mllib] object LocalKMeans extends Logging { while (moved && iteration < maxIterations) { moved = false val counts = Array.fill(k)(0.0) - val sums = Array.fill(k)( - BDV.zeros[Double](dimensions).asInstanceOf[BV[Double]] - ) + val sums = Array.fill(k)(Vectors.zeros(dimensions)) var i = 0 while (i < points.length) { val p = points(i) val index = KMeans.findClosest(centers, p)._1 - breeze.linalg.axpy(weights(i), p.vector, sums(index)) + axpy(weights(i), p.vector, sums(index)) counts(index) += weights(i) if (index != oldClosest(i)) { moved = true @@ -97,8 +95,8 @@ private[mllib] object LocalKMeans extends Logging { // Assign center to a random point centers(j) = points(rand.nextInt(points.length)).toDense } else { - sums(j) /= counts(j) - centers(j) = new BreezeVectorWithNorm(sums(j)) + scal(1.0 / counts(j), sums(j)) + centers(j) = new VectorWithNorm(sums(j)) } j += 1 } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 9353351af72a0..b0d05ae33e1b5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.util import scala.reflect.ClassTag -import breeze.linalg.{Vector => BV, DenseVector => BDV, SparseVector => BSV, +import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, squaredDistance => breezeSquaredDistance} import org.apache.spark.annotation.Experimental @@ -28,7 +28,8 @@ import org.apache.spark.rdd.RDD import org.apache.spark.rdd.PartitionwiseSampledRDD import org.apache.spark.util.random.BernoulliCellSampler import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.linalg.{Vector, Vectors} +import org.apache.spark.mllib.linalg.{SparseVector, Vector, Vectors} +import org.apache.spark.mllib.linalg.BLAS.dot import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream @@ -281,9 +282,9 @@ object MLUtils { * @return squared distance between v1 and v2 within the specified precision */ private[mllib] def fastSquaredDistance( - v1: BV[Double], + v1: Vector, norm1: Double, - v2: BV[Double], + v2: Vector, norm2: Double, precision: Double = 1e-6): Double = { val n = v1.size @@ -306,16 +307,19 @@ object MLUtils { */ val precisionBound1 = 2.0 * EPSILON * sumSquaredNorm / (normDiff * normDiff + EPSILON) if (precisionBound1 < precision) { - sqDist = sumSquaredNorm - 2.0 * v1.dot(v2) - } else if (v1.isInstanceOf[BSV[Double]] || v2.isInstanceOf[BSV[Double]]) { - val dot = v1.dot(v2) - sqDist = math.max(sumSquaredNorm - 2.0 * dot, 0.0) - val precisionBound2 = EPSILON * (sumSquaredNorm + 2.0 * math.abs(dot)) / (sqDist + EPSILON) + sqDist = sumSquaredNorm - 2.0 * dot(v1, v2) + } else if (v1.isInstanceOf[SparseVector] || v2.isInstanceOf[SparseVector]) { + val dotValue = dot(v1, v2) + sqDist = math.max(sumSquaredNorm - 2.0 * dotValue, 0.0) + val precisionBound2 = EPSILON * (sumSquaredNorm + 2.0 * math.abs(dotValue)) / + (sqDist + EPSILON) if (precisionBound2 > precision) { - sqDist = breezeSquaredDistance(v1, v2) + // TODO: breezeSquaredDistance is slow, + // so we should replace it with our own implementation. + sqDist = breezeSquaredDistance(v1.toBreeze, v2.toBreeze) } } else { - sqDist = breezeSquaredDistance(v1, v2) + sqDist = breezeSquaredDistance(v1.toBreeze, v2.toBreeze) } sqDist } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index 88bc49cc61f94..df07987093fbf 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -44,18 +44,19 @@ class MLUtilsSuite extends FunSuite with MLlibTestSparkContext { test("fast squared distance") { val a = (30 to 0 by -1).map(math.pow(2.0, _)).toArray val n = a.length - val v1 = new BDV[Double](a) - val norm1 = breezeNorm(v1, 2.0) + val v1 = Vectors.dense(a) + val norm1 = Vectors.norm(v1, 2.0) val precision = 1e-6 for (m <- 0 until n) { val indices = (0 to m).toArray val values = indices.map(i => a(i)) - val v2 = new BSV[Double](indices, values, n) - val norm2 = breezeNorm(v2, 2.0) - val squaredDist = breezeSquaredDistance(v1, v2) + val v2 = Vectors.sparse(n, indices, values) + val norm2 = Vectors.norm(v2, 2.0) + val squaredDist = breezeSquaredDistance(v1.toBreeze, v2.toBreeze) val fastSquaredDist1 = fastSquaredDistance(v1, norm1, v2, norm2, precision) assert((fastSquaredDist1 - squaredDist) <= precision * squaredDist, s"failed with m = $m") - val fastSquaredDist2 = fastSquaredDistance(v1, norm1, v2.toDenseVector, norm2, precision) + val fastSquaredDist2 = + fastSquaredDistance(v1, norm1, Vectors.dense(v2.toArray), norm2, precision) assert((fastSquaredDist2 - squaredDist) <= precision * squaredDist, s"failed with m = $m") } } From d00542987ed80635782dcc826fc0bdbf434fff10 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Wed, 3 Dec 2014 22:31:39 +0800 Subject: [PATCH 46/82] [SPARK-4717][MLlib] Optimize BLAS library to avoid de-reference multiple times in loop Have a local reference to `values` and `indices` array in the `Vector` object so JVM can locate the value with one operation call. See `SPARK-4581` for similar optimization, and the bytecode analysis. Author: DB Tsai Closes #3577 from dbtsai/blasopt and squashes the following commits: 62d38c4 [DB Tsai] formating 0316cef [DB Tsai] first commit --- .../org/apache/spark/mllib/linalg/BLAS.scala | 99 +++++++++++-------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 89539e600f48c..8c4c9c6cf6ae2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -72,17 +72,21 @@ private[spark] object BLAS extends Serializable with Logging { * y += a * x */ private def axpy(a: Double, x: SparseVector, y: DenseVector): Unit = { - val nnz = x.indices.size + val xValues = x.values + val xIndices = x.indices + val yValues = y.values + val nnz = xIndices.size + if (a == 1.0) { var k = 0 while (k < nnz) { - y.values(x.indices(k)) += x.values(k) + yValues(xIndices(k)) += xValues(k) k += 1 } } else { var k = 0 while (k < nnz) { - y.values(x.indices(k)) += a * x.values(k) + yValues(xIndices(k)) += a * xValues(k) k += 1 } } @@ -119,11 +123,15 @@ private[spark] object BLAS extends Serializable with Logging { * dot(x, y) */ private def dot(x: SparseVector, y: DenseVector): Double = { - val nnz = x.indices.size + val xValues = x.values + val xIndices = x.indices + val yValues = y.values + val nnz = xIndices.size + var sum = 0.0 var k = 0 while (k < nnz) { - sum += x.values(k) * y.values(x.indices(k)) + sum += xValues(k) * yValues(xIndices(k)) k += 1 } sum @@ -133,19 +141,24 @@ private[spark] object BLAS extends Serializable with Logging { * dot(x, y) */ private def dot(x: SparseVector, y: SparseVector): Double = { + val xValues = x.values + val xIndices = x.indices + val yValues = y.values + val yIndices = y.indices + val nnzx = xIndices.size + val nnzy = yIndices.size + var kx = 0 - val nnzx = x.indices.size var ky = 0 - val nnzy = y.indices.size var sum = 0.0 // y catching x while (kx < nnzx && ky < nnzy) { - val ix = x.indices(kx) - while (ky < nnzy && y.indices(ky) < ix) { + val ix = xIndices(kx) + while (ky < nnzy && yIndices(ky) < ix) { ky += 1 } - if (ky < nnzy && y.indices(ky) == ix) { - sum += x.values(kx) * y.values(ky) + if (ky < nnzy && yIndices(ky) == ix) { + sum += xValues(kx) * yValues(ky) ky += 1 } kx += 1 @@ -163,21 +176,25 @@ private[spark] object BLAS extends Serializable with Logging { case dy: DenseVector => x match { case sx: SparseVector => + val sxIndices = sx.indices + val sxValues = sx.values + val dyValues = dy.values + val nnz = sxIndices.size + var i = 0 var k = 0 - val nnz = sx.indices.size while (k < nnz) { - val j = sx.indices(k) + val j = sxIndices(k) while (i < j) { - dy.values(i) = 0.0 + dyValues(i) = 0.0 i += 1 } - dy.values(i) = sx.values(k) + dyValues(i) = sxValues(k) i += 1 k += 1 } while (i < n) { - dy.values(i) = 0.0 + dyValues(i) = 0.0 i += 1 } case dx: DenseVector => @@ -311,6 +328,8 @@ private[spark] object BLAS extends Serializable with Logging { s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB") val Avals = A.values + val Bvals = B.values + val Cvals = C.values val Arows = if (!transA) A.rowIndices else A.colPtrs val Acols = if (!transA) A.colPtrs else A.rowIndices @@ -327,11 +346,11 @@ private[spark] object BLAS extends Serializable with Logging { val indEnd = Arows(rowCounterForA + 1) var sum = 0.0 while (i < indEnd) { - sum += Avals(i) * B.values(Bstart + Acols(i)) + sum += Avals(i) * Bvals(Bstart + Acols(i)) i += 1 } val Cindex = Cstart + rowCounterForA - C.values(Cindex) = beta * C.values(Cindex) + sum * alpha + Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha rowCounterForA += 1 } colCounterForB += 1 @@ -349,7 +368,7 @@ private[spark] object BLAS extends Serializable with Logging { i += 1 } val Cindex = Cstart + rowCounter - C.values(Cindex) = beta * C.values(Cindex) + sum * alpha + Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha rowCounter += 1 } colCounterForB += 1 @@ -357,7 +376,7 @@ private[spark] object BLAS extends Serializable with Logging { } } else { // Scale matrix first if `beta` is not equal to 0.0 - if (beta != 0.0){ + if (beta != 0.0) { f2jBLAS.dscal(C.values.length, beta, C.values, 1) } // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of @@ -371,9 +390,9 @@ private[spark] object BLAS extends Serializable with Logging { while (colCounterForA < kA) { var i = Acols(colCounterForA) val indEnd = Acols(colCounterForA + 1) - val Bval = B.values(Bstart + colCounterForA) * alpha - while (i < indEnd){ - C.values(Cstart + Arows(i)) += Avals(i) * Bval + val Bval = Bvals(Bstart + colCounterForA) * alpha + while (i < indEnd) { + Cvals(Cstart + Arows(i)) += Avals(i) * Bval i += 1 } colCounterForA += 1 @@ -384,12 +403,12 @@ private[spark] object BLAS extends Serializable with Logging { while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B val Cstart = colCounterForB * mA - while (colCounterForA < kA){ + while (colCounterForA < kA) { var i = Acols(colCounterForA) val indEnd = Acols(colCounterForA + 1) val Bval = B(colCounterForB, colCounterForA) * alpha - while (i < indEnd){ - C.values(Cstart + Arows(i)) += Avals(i) * Bval + while (i < indEnd) { + Cvals(Cstart + Arows(i)) += Avals(i) * Bval i += 1 } colCounterForA += 1 @@ -484,41 +503,43 @@ private[spark] object BLAS extends Serializable with Logging { beta: Double, y: DenseVector): Unit = { - val mA: Int = if(!trans) A.numRows else A.numCols - val nA: Int = if(!trans) A.numCols else A.numRows + val xValues = x.values + val yValues = y.values + + val mA: Int = if (!trans) A.numRows else A.numCols + val nA: Int = if (!trans) A.numCols else A.numRows val Avals = A.values val Arows = if (!trans) A.rowIndices else A.colPtrs val Acols = if (!trans) A.colPtrs else A.rowIndices - // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices - if (trans){ + if (trans) { var rowCounter = 0 - while (rowCounter < mA){ + while (rowCounter < mA) { var i = Arows(rowCounter) val indEnd = Arows(rowCounter + 1) var sum = 0.0 - while(i < indEnd){ - sum += Avals(i) * x.values(Acols(i)) + while (i < indEnd) { + sum += Avals(i) * xValues(Acols(i)) i += 1 } - y.values(rowCounter) = beta * y.values(rowCounter) + sum * alpha + yValues(rowCounter) = beta * yValues(rowCounter) + sum * alpha rowCounter += 1 } } else { // Scale vector first if `beta` is not equal to 0.0 - if (beta != 0.0){ + if (beta != 0.0) { scal(beta, y) } // Perform matrix-vector multiplication and add to y var colCounterForA = 0 - while (colCounterForA < nA){ + while (colCounterForA < nA) { var i = Acols(colCounterForA) val indEnd = Acols(colCounterForA + 1) - val xVal = x.values(colCounterForA) * alpha - while (i < indEnd){ + val xVal = xValues(colCounterForA) * alpha + while (i < indEnd) { val rowIndex = Arows(i) - y.values(rowIndex) += Avals(i) * xVal + yValues(rowIndex) += Avals(i) * xVal i += 1 } colCounterForA += 1 From a975dc32799bb8a14f9e1c76defaaa7cfbaf8b53 Mon Sep 17 00:00:00 2001 From: Jim Lim Date: Wed, 3 Dec 2014 11:16:02 -0800 Subject: [PATCH 47/82] SPARK-2624 add datanucleus jars to the container in yarn-cluster If `spark-submit` finds the datanucleus jars, it adds them to the driver's classpath, but does not add it to the container. This patch modifies the yarn deployment class to copy all `datanucleus-*` jars found in `[spark-home]/libs` to the container. Author: Jim Lim Closes #3238 from jimjh/SPARK-2624 and squashes the following commits: 3633071 [Jim Lim] SPARK-2624 update documentation and comments fe95125 [Jim Lim] SPARK-2624 keep java imports together 6c31fe0 [Jim Lim] SPARK-2624 update documentation 6690fbf [Jim Lim] SPARK-2624 add tests d28d8e9 [Jim Lim] SPARK-2624 add spark.yarn.datanucleus.dir option 84e6cba [Jim Lim] SPARK-2624 add datanucleus jars to the container in yarn-cluster --- docs/running-on-yarn.md | 15 ++++ .../apache/spark/deploy/yarn/ClientBase.scala | 66 ++++++++++++++++ .../spark/deploy/yarn/ClientBaseSuite.scala | 76 +++++++++++++++++++ 3 files changed, 157 insertions(+) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index dfe2db4b3fce8..45e219e0c136c 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -132,6 +132,21 @@ Most of the configs are the same for Spark on YARN as for other deployment modes The maximum number of threads to use in the application master for launching executor containers. + + + + +
spark.sql.parquet.binaryAsString false - Some other Parquet-producing systems, in particular Impala and older versions of Spark SQL, do - not differentiate between binary data and strings when writing out the Parquet schema. This + Some other Parquet-producing systems, in particular Impala and older versions of Spark SQL, do + not differentiate between binary data and strings when writing out the Parquet schema. This flag tells Spark SQL to interpret binary data as a string to provide compatibility with these systems.
spark.sql.parquet.compression.codec gzip - Sets the compression codec use when writing Parquet files. Acceptable values include: + Sets the compression codec use when writing Parquet files. Acceptable values include: uncompressed, snappy, gzip, lzo.
spark.sql.parquet.filterPushdownfalse + Turn on Parquet filter pushdown optimization. This feature is turned off by default because of a known + bug in Paruet 1.6.0rc3 (PARQUET-136). + However, if your table doesn't contain any nullable string or binary columns, it's still safe to turn + this feature on. +
spark.sql.hive.convertMetastoreParquet true
DateType java.sql.Date + DateType +
ArrayType scala.collection.Seq
DateType java.sql.Date + DataType.DateType +
ArrayType java.util.List
DateType datetime.date + DateType() +
ArrayType list, tuple, or array (none) The cluster manager to connect to. See the list of - allowed master URL's. + allowed master URL's.
spark.yarn.datanucleus.dir$SPARK_HOME/lib + The location of the DataNucleus jars, in case overriding the default location is desired. + By default, Spark on YARN will use the DataNucleus jars installed at + $SPARK_HOME/lib, but the jars can also be in a world-readable location on HDFS. + This allows YARN to cache it on nodes so that it doesn't need to be distributed each time an + application runs. To point to a directory on HDFS, for example, set this configuration to + "hdfs:///some/path". + + This is required because the datanucleus jars cannot be packaged into the + assembly jar due to metadata conflicts (involving plugin.xml.) +
# Launching Spark on YARN diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index f95d72379171c..8e4360ea4476b 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy.yarn import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException} +import java.io.{File, FilenameFilter} import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} @@ -223,10 +224,48 @@ private[spark] trait ClientBase extends Logging { } } } + if (cachedSecondaryJarLinks.nonEmpty) { sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(",")) } + /** + * Do the same for datanucleus jars, if they exist in spark home. Find all datanucleus-* jars, + * copy them to the remote fs, and add them to the class path. + * + * This is necessary because the datanucleus jars cannot be included in the assembly jar due + * to metadata conflicts involving plugin.xml. At the time of writing, these are the only + * jars that cannot be distributed with the uber jar and have to be treated differently. + * + * For more details, see SPARK-2624, and https://github.com/apache/spark/pull/3238 + */ + for (libsDir <- dataNucleusJarsDir(sparkConf)) { + val libsURI = new URI(libsDir) + val jarLinks = ListBuffer.empty[String] + if (libsURI.getScheme != LOCAL_SCHEME) { + val localURI = getQualifiedLocalPath(libsURI).toUri() + val jars = FileSystem.get(localURI, hadoopConf).listFiles(new Path(localURI.getPath), false) + while (jars.hasNext) { + val jar = jars.next() + val name = jar.getPath.getName + if (name.startsWith("datanucleus-")) { + // copy to remote and add to classpath + val src = jar.getPath + val destPath = copyFileToRemote(dst, src, replication) + distCacheMgr.addResource(fs, hadoopConf, destPath, + localResources, LocalResourceType.FILE, name, statCache) + jarLinks += name + } + } + } else { + jarLinks += libsURI.toString + Path.SEPARATOR + "*" + } + + if (jarLinks.nonEmpty) { + sparkConf.set(CONF_SPARK_DATANUCLEUS_JARS, jarLinks.mkString(",")) + } + } + localResources } @@ -551,6 +590,13 @@ private[spark] object ClientBase extends Logging { // Internal config to propagate the location of the user's jar to the driver/executors val CONF_SPARK_USER_JAR = "spark.yarn.user.jar" + // Location of the datanucleus jars + val CONF_SPARK_DATANUCLEUS_DIR = "spark.yarn.datanucleus.dir" + + // Internal config to propagate the locations of datanucleus jars found to add to the + // classpath of the executors. Value should be a comma-separated list of paths to each jar. + val CONF_SPARK_DATANUCLEUS_JARS = "spark.yarn.datanucleus.jars" + // Internal config to propagate the locations of any extra jars to add to the classpath // of the executors val CONF_SPARK_YARN_SECONDARY_JARS = "spark.yarn.secondary.jars" @@ -583,6 +629,19 @@ private[spark] object ClientBase extends Logging { } } + /** + * Find the user-defined provided jars directory if configured, or return SPARK_HOME/lib if not. + * + * This method first looks for $CONF_SPARK_DATANUCLEUS_DIR inside the SparkConf, then looks for + * Spark home inside the the SparkConf and the user environment. + */ + private def dataNucleusJarsDir(conf: SparkConf): Option[String] = { + conf.getOption(CONF_SPARK_DATANUCLEUS_DIR).orElse { + val sparkHome = conf.getOption("spark.home").orElse(sys.env.get("SPARK_HOME")) + sparkHome.map(path => path + Path.SEPARATOR + "lib") + } + } + /** * Return the path to the given application's staging directory. */ @@ -684,6 +743,13 @@ private[spark] object ClientBase extends Logging { addUserClasspath(args, sparkConf, env) } + // Add datanucleus jars to classpath + for (entries <- sparkConf.getOption(CONF_SPARK_DATANUCLEUS_JARS)) { + entries.split(",").filter(_.nonEmpty).foreach { entry => + addFileToClasspath(entry, null, env) + } + } + // Append all jar files under the working directory to the classpath. addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + "*", env) } diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala index 17b79ae1d82c4..b055e9b72dc61 100644 --- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala +++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala @@ -21,6 +21,7 @@ import java.io.File import java.net.URI import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -104,6 +105,81 @@ class ClientBaseSuite extends FunSuite with Matchers { cp should not contain (ClientBase.APP_JAR) } + test("DataNucleus in classpath") { + val dnJars = "local:/dn/core.jar,/dn/api.jar" + val conf = new Configuration() + val sparkConf = new SparkConf() + .set(ClientBase.CONF_SPARK_JAR, SPARK) + .set(ClientBase.CONF_SPARK_DATANUCLEUS_JARS, dnJars) + val env = new MutableHashMap[String, String]() + val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf) + + ClientBase.populateClasspath(args, conf, sparkConf, env) + + val cp = env("CLASSPATH").split(File.pathSeparator) + s"$dnJars".split(",").foreach({ entry => + val uri = new URI(entry) + if (ClientBase.LOCAL_SCHEME.equals(uri.getScheme())) { + cp should contain (uri.getPath()) + } else { + cp should not contain (uri.getPath()) + } + }) + } + + test("DataNucleus using local:") { + val dnDir = "local:/datanucleus" + val conf = new Configuration() + val sparkConf = new SparkConf() + .set(ClientBase.CONF_SPARK_JAR, SPARK) + .set(ClientBase.CONF_SPARK_DATANUCLEUS_DIR, dnDir) + val yarnConf = new YarnConfiguration() + val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf) + + val client = spy(new DummyClient(args, conf, sparkConf, yarnConf)) + doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), + any(classOf[Path]), anyShort(), anyBoolean()) + + val tempDir = Utils.createTempDir() + try { + client.prepareLocalResources(tempDir.getAbsolutePath()) + val jars = sparkConf.get(ClientBase.CONF_SPARK_DATANUCLEUS_JARS).split(",") + val uri = new URI(dnDir) + jars should contain (uri.toString + Path.SEPARATOR + "*") + } finally { + Utils.deleteRecursively(tempDir) + } + } + + test("DataNucleus using file:") { + val dnDir = Utils.createTempDir() + val tempDir = Utils.createTempDir() + + try { + // create mock datanucleus jar + val tempJar = File.createTempFile("datanucleus-", null, dnDir) + + val conf = new Configuration() + val sparkConf = new SparkConf() + .set(ClientBase.CONF_SPARK_JAR, SPARK) + .set(ClientBase.CONF_SPARK_DATANUCLEUS_DIR, dnDir.toURI.toString) + val yarnConf = new YarnConfiguration() + val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf) + + val client = spy(new DummyClient(args, conf, sparkConf, yarnConf)) + doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), + any(classOf[Path]), anyShort(), anyBoolean()) + + client.prepareLocalResources(tempDir.getAbsolutePath()) + + val jars = sparkConf.get(ClientBase.CONF_SPARK_DATANUCLEUS_JARS).split(",") + jars should contain (tempJar.getName) + } finally { + Utils.deleteRecursively(dnDir) + Utils.deleteRecursively(tempDir) + } + } + test("Jar path propagation through SparkConf") { val conf = new Configuration() val sparkConf = new SparkConf().set(ClientBase.CONF_SPARK_JAR, SPARK) From 96786e3ee53a13a57463b74bec0e77b172f719a3 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Wed, 3 Dec 2014 12:08:00 -0800 Subject: [PATCH 48/82] [SPARK-4701] Typo in sbt/sbt Modified typo. Author: Masayoshi TSUZUKI Closes #3560 from tsudukim/feature/SPARK-4701 and squashes the following commits: ed2a3f1 [Masayoshi TSUZUKI] Another whitespace position error. 1af3a35 [Masayoshi TSUZUKI] [SPARK-4701] Typo in sbt/sbt --- sbt/sbt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbt/sbt b/sbt/sbt index c172fa74bc771..0a251d97db95c 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -71,8 +71,8 @@ Usage: $script_name [options] -Dkey=val pass -Dkey=val directly to the java runtime -J-X pass option -X directly to the java runtime (-J is stripped) - -S-X add -X to sbt's scalacOptions (-J is stripped) - -PmavenProfiles Enable a maven profile for the build. + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. In the case of duplicated or conflicting options, the order above shows precedence: JAVA_OPTS lowest, command line options highest. From edd3cd477c9d6016bd977c2fa692fdeff5a6e198 Mon Sep 17 00:00:00 2001 From: zsxwing Date: Wed, 3 Dec 2014 12:19:40 -0800 Subject: [PATCH 49/82] [SPARK-4715][Core] Make sure tryToAcquire won't return a negative value ShuffleMemoryManager.tryToAcquire may return a negative value. The unit test demonstrates this bug. It will output `0 did not equal -200 granted is negative`. Author: zsxwing Closes #3575 from zsxwing/SPARK-4715 and squashes the following commits: a193ae6 [zsxwing] Make sure tryToAcquire won't return a negative value --- .../spark/shuffle/ShuffleMemoryManager.scala | 5 +++-- .../shuffle/ShuffleMemoryManagerSuite.scala | 17 ++++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala index ee91a368b76ea..3bcc7178a3d8b 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleMemoryManager.scala @@ -66,8 +66,9 @@ private[spark] class ShuffleMemoryManager(maxMemory: Long) extends Logging { val curMem = threadMemory(threadId) val freeMemory = maxMemory - threadMemory.values.sum - // How much we can grant this thread; don't let it grow to more than 1 / numActiveThreads - val maxToGrant = math.min(numBytes, (maxMemory / numActiveThreads) - curMem) + // How much we can grant this thread; don't let it grow to more than 1 / numActiveThreads; + // don't let it be negative + val maxToGrant = math.min(numBytes, math.max(0, (maxMemory / numActiveThreads) - curMem)) if (curMem < maxMemory / (2 * numActiveThreads)) { // We want to let each thread get at least 1 / (2 * numActiveThreads) before blocking; diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala index d31bc22ee74f7..e0e646f0a3652 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleMemoryManagerSuite.scala @@ -159,7 +159,7 @@ class ShuffleMemoryManagerSuite extends FunSuite with Timeouts { test("threads can block to get at least 1 / 2N memory") { // t1 grabs 1000 bytes and then waits until t2 is ready to make a request. It sleeps - // for a bit and releases 250 bytes, which should then be greanted to t2. Further requests + // for a bit and releases 250 bytes, which should then be granted to t2. Further requests // by t2 will return false right away because it now has 1 / 2N of the memory. val manager = new ShuffleMemoryManager(1000L) @@ -291,4 +291,19 @@ class ShuffleMemoryManagerSuite extends FunSuite with Timeouts { assert(state.t2WaitTime > 200, s"t2 waited less than 200 ms (${state.t2WaitTime})") } } + + test("threads should not be granted a negative size") { + val manager = new ShuffleMemoryManager(1000L) + manager.tryToAcquire(700L) + + val latch = new CountDownLatch(1) + startThread("t1") { + manager.tryToAcquire(300L) + latch.countDown() + } + latch.await() // Wait until `t1` calls `tryToAcquire` + + val granted = manager.tryToAcquire(300L) + assert(0 === granted, "granted is negative") + } } From 692f49378f7d384d5c9c5ab7451a1c1e66f91c50 Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Wed, 3 Dec 2014 13:16:24 -0800 Subject: [PATCH 50/82] [SPARK-4642] Add description about spark.yarn.queue to running-on-YARN document. Added descriptions about these parameters. - spark.yarn.queue Modified description about the defalut value of this parameter. - spark.yarn.submit.file.replication Author: Masayoshi TSUZUKI Closes #3500 from tsudukim/feature/SPARK-4642 and squashes the following commits: ce99655 [Masayoshi TSUZUKI] better gramatically. 21cf624 [Masayoshi TSUZUKI] Removed intentionally undocumented properties. 88cac9b [Masayoshi TSUZUKI] [SPARK-4642] Documents about running-on-YARN needs update --- docs/running-on-yarn.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 45e219e0c136c..e97ac9f0c4a00 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -30,7 +30,7 @@ Most of the configs are the same for Spark on YARN as for other deployment modes spark.yarn.submit.file.replication - 3 + The default HDFS replication (usually 3) HDFS replication level for the files uploaded into HDFS for the application. These include things like the Spark jar, the app jar, and any distributed cache files/archives. @@ -91,6 +91,13 @@ Most of the configs are the same for Spark on YARN as for other deployment modes The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%). + + spark.yarn.queue + default + + The name of the YARN queue to which the application is submitted. + + spark.yarn.jar (none) From 90ec643e9af4c8bbb9000edca08c07afb17939c7 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 3 Dec 2014 13:56:23 -0800 Subject: [PATCH 51/82] [HOT FIX] [YARN] Check whether `/lib` exists before listing its files This is caused by a975dc32799bb8a14f9e1c76defaaa7cfbaf8b53 Author: Andrew Or Closes #3589 from andrewor14/yarn-hot-fix and squashes the following commits: a4fad5f [Andrew Or] Check whether lib directory exists before listing its files --- .../apache/spark/deploy/yarn/ClientBase.scala | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 8e4360ea4476b..290d9943a5077 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -243,18 +243,21 @@ private[spark] trait ClientBase extends Logging { val libsURI = new URI(libsDir) val jarLinks = ListBuffer.empty[String] if (libsURI.getScheme != LOCAL_SCHEME) { - val localURI = getQualifiedLocalPath(libsURI).toUri() - val jars = FileSystem.get(localURI, hadoopConf).listFiles(new Path(localURI.getPath), false) - while (jars.hasNext) { - val jar = jars.next() - val name = jar.getPath.getName - if (name.startsWith("datanucleus-")) { - // copy to remote and add to classpath - val src = jar.getPath - val destPath = copyFileToRemote(dst, src, replication) - distCacheMgr.addResource(fs, hadoopConf, destPath, - localResources, LocalResourceType.FILE, name, statCache) - jarLinks += name + val localPath = getQualifiedLocalPath(libsURI) + val localFs = FileSystem.get(localPath.toUri, hadoopConf) + if (localFs.exists(localPath)) { + val jars = localFs.listFiles(localPath, /* recursive */ false) + while (jars.hasNext) { + val jar = jars.next() + val name = jar.getPath.getName + if (name.startsWith("datanucleus-")) { + // copy to remote and add to classpath + val src = jar.getPath + val destPath = copyFileToRemote(dst, src, replication) + distCacheMgr.addResource(localFs, hadoopConf, destPath, + localResources, LocalResourceType.FILE, name, statCache) + jarLinks += name + } } } } else { From 513ef82e85661552e596d0b483b645ac24e86d4d Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Wed, 3 Dec 2014 14:13:35 -0800 Subject: [PATCH 52/82] [SPARK-4552][SQL] Avoid exception when reading empty parquet data through Hive This is a very small fix that catches one specific exception and returns an empty table. #3441 will address this in a more principled way. Author: Michael Armbrust Closes #3586 from marmbrus/fixEmptyParquet and squashes the following commits: 2781d9f [Michael Armbrust] Handle empty lists for newParquet 04dd376 [Michael Armbrust] Avoid exception when reading empty parquet data through Hive --- .../apache/spark/sql/parquet/newParquet.scala | 5 +- .../spark/sql/hive/HiveStrategies.scala | 96 ++++++++++--------- .../spark/sql/parquet/parquetSuites.scala | 6 ++ 3 files changed, 62 insertions(+), 45 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index 9b89c3bfb3307..14f8659f15b3f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -191,7 +191,10 @@ case class ParquetRelation2(path: String)(@transient val sqlContext: SQLContext) val selectedPartitions = partitions.filter(p => partitionFilters.forall(_(p))) val fs = FileSystem.get(new java.net.URI(path), sparkContext.hadoopConfiguration) val selectedFiles = selectedPartitions.flatMap(_.files).map(f => fs.makeQualified(f.getPath)) - org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, selectedFiles:_*) + // FileInputFormat cannot handle empty lists. + if (selectedFiles.nonEmpty) { + org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, selectedFiles: _*) + } // Push down filters when possible predicates diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 56fc85239e1c0..edf291f917f07 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.planning._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.types.StringType -import org.apache.spark.sql.execution.{DescribeCommand, OutputFaker, SparkPlan} +import org.apache.spark.sql.execution.{DescribeCommand, OutputFaker, SparkPlan, PhysicalRDD} import org.apache.spark.sql.hive import org.apache.spark.sql.hive.execution._ import org.apache.spark.sql.parquet.ParquetRelation @@ -104,53 +104,61 @@ private[hive] trait HiveStrategies { case a: AttributeReference => UnresolvedAttribute(a.name) }) - if (relation.hiveQlTable.isPartitioned) { - val rawPredicate = pruningPredicates.reduceOption(And).getOrElse(Literal(true)) - // Translate the predicate so that it automatically casts the input values to the correct - // data types during evaluation - val castedPredicate = rawPredicate transform { - case a: AttributeReference => - val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId) - val key = relation.partitionKeys(idx) - Cast(BoundReference(idx, StringType, nullable = true), key.dataType) - } - - val inputData = new GenericMutableRow(relation.partitionKeys.size) - val pruningCondition = - if(codegenEnabled) { - GeneratePredicate(castedPredicate) - } else { - InterpretedPredicate(castedPredicate) + try { + if (relation.hiveQlTable.isPartitioned) { + val rawPredicate = pruningPredicates.reduceOption(And).getOrElse(Literal(true)) + // Translate the predicate so that it automatically casts the input values to the + // correct data types during evaluation. + val castedPredicate = rawPredicate transform { + case a: AttributeReference => + val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId) + val key = relation.partitionKeys(idx) + Cast(BoundReference(idx, StringType, nullable = true), key.dataType) } - val partitions = relation.hiveQlPartitions.filter { part => - val partitionValues = part.getValues - var i = 0 - while (i < partitionValues.size()) { - inputData(i) = partitionValues(i) - i += 1 + val inputData = new GenericMutableRow(relation.partitionKeys.size) + val pruningCondition = + if (codegenEnabled) { + GeneratePredicate(castedPredicate) + } else { + InterpretedPredicate(castedPredicate) + } + + val partitions = relation.hiveQlPartitions.filter { part => + val partitionValues = part.getValues + var i = 0 + while (i < partitionValues.size()) { + inputData(i) = partitionValues(i) + i += 1 + } + pruningCondition(inputData) } - pruningCondition(inputData) - } - hiveContext - .parquetFile(partitions.map(_.getLocation).mkString(",")) - .addPartitioningAttributes(relation.partitionKeys) - .lowerCase - .where(unresolvedOtherPredicates) - .select(unresolvedProjection:_*) - .queryExecution - .executedPlan - .fakeOutput(projectList.map(_.toAttribute)):: Nil - } else { - hiveContext - .parquetFile(relation.hiveQlTable.getDataLocation.toString) - .lowerCase - .where(unresolvedOtherPredicates) - .select(unresolvedProjection:_*) - .queryExecution - .executedPlan - .fakeOutput(projectList.map(_.toAttribute)) :: Nil + hiveContext + .parquetFile(partitions.map(_.getLocation).mkString(",")) + .addPartitioningAttributes(relation.partitionKeys) + .lowerCase + .where(unresolvedOtherPredicates) + .select(unresolvedProjection: _*) + .queryExecution + .executedPlan + .fakeOutput(projectList.map(_.toAttribute)) :: Nil + } else { + hiveContext + .parquetFile(relation.hiveQlTable.getDataLocation.toString) + .lowerCase + .where(unresolvedOtherPredicates) + .select(unresolvedProjection: _*) + .queryExecution + .executedPlan + .fakeOutput(projectList.map(_.toAttribute)) :: Nil + } + } catch { + // parquetFile will throw an exception when there is no data. + // TODO: Remove this hack for Spark 1.3. + case iae: java.lang.IllegalArgumentException + if iae.getMessage.contains("Can not create a Path from an empty string") => + PhysicalRDD(plan.output, sparkContext.emptyRDD[Row]) :: Nil } case _ => Nil } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala index 7159ebd0353ad..488ebba043794 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala @@ -218,6 +218,12 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll { 10) } + test(s"non-existant partition $table") { + checkAnswer( + sql(s"SELECT COUNT(*) FROM $table WHERE p = 1000"), + 0) + } + test(s"multi-partition pruned count $table") { checkAnswer( sql(s"SELECT COUNT(*) FROM $table WHERE p IN (1,2,3)"), From 96b27855c5f9789d1f15316564a8e0fa2cd5a51b Mon Sep 17 00:00:00 2001 From: Mark Hamstra Date: Wed, 3 Dec 2014 15:08:01 -0800 Subject: [PATCH 53/82] [SPARK-4498][core] Don't transition ExecutorInfo to RUNNING until Driver adds Executor The ExecutorInfo only reaches the RUNNING state if the Driver is alive to send the ExecutorStateChanged message to master. Else, appInfo.resetRetryCount() is never called and failing Executors will eventually exceed ApplicationState.MAX_NUM_RETRY, resulting in the application being removed from the master's accounting. JoshRosen Author: Mark Hamstra Closes #3550 from markhamstra/SPARK-4498 and squashes the following commits: 8f543b1 [Mark Hamstra] Don't transition ExecutorInfo to RUNNING until Executor is added by Driver --- .../main/scala/org/apache/spark/deploy/client/AppClient.scala | 1 + .../scala/org/apache/spark/deploy/worker/ExecutorRunner.scala | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala index 98a93d1fcb2a3..4efebcaa350fe 100644 --- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala @@ -134,6 +134,7 @@ private[spark] class AppClient( val fullId = appId + "/" + id logInfo("Executor added: %s on %s (%s) with %d cores".format(fullId, workerId, hostPort, cores)) + master ! ExecutorStateChanged(appId, id, ExecutorState.RUNNING, None, None) listener.executorAdded(fullId, workerId, hostPort, cores, memory) case ExecutorUpdated(id, state, message, exitStatus) => diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index 8ba6a01bbcb97..f4fedc6327ab9 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -144,8 +144,6 @@ private[spark] class ExecutorRunner( Files.write(header, stderr, UTF_8) stderrAppender = FileAppender(process.getErrorStream, stderr, conf) - state = ExecutorState.RUNNING - worker ! ExecutorStateChanged(appId, execId, state, None, None) // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown) // or with nonzero exit code val exitCode = process.waitFor() From 1826372d0a1bc80db9015106dd5d2d155ada33f5 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 3 Dec 2014 16:28:24 -0800 Subject: [PATCH 54/82] [SPARK-4085] Propagate FetchFailedException when Spark fails to read local shuffle file. cc aarondav kayousterhout pwendell This should go into 1.2? Author: Reynold Xin Closes #3579 from rxin/SPARK-4085 and squashes the following commits: 255b4fd [Reynold Xin] Updated test. f9814d9 [Reynold Xin] Code review feedback. 2afaf35 [Reynold Xin] [SPARK-4085] Propagate FetchFailedException when Spark fails to read local shuffle file. --- .../storage/ShuffleBlockFetcherIterator.scala | 28 +++++++++++-------- .../spark/ExternalShuffleServiceSuite.scala | 2 -- .../scala/org/apache/spark/ShuffleSuite.scala | 23 +++++++++++++++ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala index 83170f7c5a4ab..2499c11a65b0e 100644 --- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala +++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala @@ -17,6 +17,7 @@ package org.apache.spark.storage +import java.io.{InputStream, IOException} import java.util.concurrent.LinkedBlockingQueue import scala.collection.mutable.{ArrayBuffer, HashSet, Queue} @@ -289,17 +290,22 @@ final class ShuffleBlockFetcherIterator( } val iteratorTry: Try[Iterator[Any]] = result match { - case FailureFetchResult(_, e) => Failure(e) - case SuccessFetchResult(blockId, _, buf) => { - val is = blockManager.wrapForCompression(blockId, buf.createInputStream()) - val iter = serializer.newInstance().deserializeStream(is).asIterator - Success(CompletionIterator[Any, Iterator[Any]](iter, { - // Once the iterator is exhausted, release the buffer and set currentResult to null - // so we don't release it again in cleanup. - currentResult = null - buf.release() - })) - } + case FailureFetchResult(_, e) => + Failure(e) + case SuccessFetchResult(blockId, _, buf) => + // There is a chance that createInputStream can fail (e.g. fetching a local file that does + // not exist, SPARK-4085). In that case, we should propagate the right exception so + // the scheduler gets a FetchFailedException. + Try(buf.createInputStream()).map { is0 => + val is = blockManager.wrapForCompression(blockId, is0) + val iter = serializer.newInstance().deserializeStream(is).asIterator + CompletionIterator[Any, Iterator[Any]](iter, { + // Once the iterator is exhausted, release the buffer and set currentResult to null + // so we don't release it again in cleanup. + currentResult = null + buf.release() + }) + } } (result.blockId, iteratorTry) diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala index cc3592ee43a35..bac6fdbcdc976 100644 --- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark -import java.util.concurrent.atomic.AtomicInteger - import org.scalatest.BeforeAndAfterAll import org.apache.spark.network.TransportContext diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala index 5a133c0490444..58a96245a9b53 100644 --- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala @@ -23,6 +23,7 @@ import org.scalatest.Matchers import org.apache.spark.ShuffleSuite.NonJavaSerializableClass import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD} import org.apache.spark.serializer.KryoSerializer +import org.apache.spark.storage.{ShuffleDataBlockId, ShuffleBlockId} import org.apache.spark.util.MutablePair abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext { @@ -263,6 +264,28 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex } } } + + test("[SPARK-4085] rerun map stage if reduce stage cannot find its local shuffle file") { + val myConf = conf.clone().set("spark.test.noStageRetry", "false") + sc = new SparkContext("local", "test", myConf) + val rdd = sc.parallelize(1 to 10, 2).map((_, 1)).reduceByKey(_ + _) + rdd.count() + + // Delete one of the local shuffle blocks. + val hashFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleBlockId(0, 0, 0)) + val sortFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleDataBlockId(0, 0, 0)) + assert(hashFile.exists() || sortFile.exists()) + + if (hashFile.exists()) { + hashFile.delete() + } + if (sortFile.exists()) { + sortFile.delete() + } + + // This count should retry the execution of the previous stage and rerun shuffle. + rdd.count() + } } object ShuffleSuite { From 27ab0b8a03b711e8d86b6167df833f012205ccc7 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Thu, 4 Dec 2014 08:58:03 +0800 Subject: [PATCH 55/82] [SPARK-4711] [mllib] [docs] Programming guide advice on choosing optimizer I have heard requests for the docs to include advice about choosing an optimization method. The programming guide could include a brief statement about this (so the user does not have to read the whole optimization section). CC: mengxr Author: Joseph K. Bradley Closes #3569 from jkbradley/lr-doc and squashes the following commits: 654aeb5 [Joseph K. Bradley] updated section header for mllib-optimization 5035ad0 [Joseph K. Bradley] updated based on review 94f6dec [Joseph K. Bradley] Updated linear methods and optimization docs with quick advice on choosing an optimization method --- docs/mllib-linear-methods.md | 10 +++++++--- docs/mllib-optimization.md | 17 +++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md index bc914a1899801..44b7f67c57734 100644 --- a/docs/mllib-linear-methods.md +++ b/docs/mllib-linear-methods.md @@ -110,12 +110,16 @@ However, L1 regularization can help promote sparsity in weights leading to small It is not recommended to train models without any regularization, especially when the number of training examples is small. +### Optimization + +Under the hood, linear methods use convex optimization methods to optimize the objective functions. MLlib uses two methods, SGD and L-BFGS, described in the [optimization section](mllib-optimization.html). Currently, most algorithm APIs support Stochastic Gradient Descent (SGD), and a few support L-BFGS. Refer to [this optimization section](mllib-optimization.html#Choosing-an-Optimization-Method) for guidelines on choosing between optimization methods. + ## Binary classification [Binary classification](http://en.wikipedia.org/wiki/Binary_classification) aims to divide items into two categories: positive and negative. MLlib -supports two linear methods for binary classification: linear support vector -machines (SVMs) and logistic regression. For both methods, MLlib supports +supports two linear methods for binary classification: linear Support Vector +Machines (SVMs) and logistic regression. For both methods, MLlib supports L1 and L2 regularized variants. The training data set is represented by an RDD of [LabeledPoint](mllib-data-types.html) in MLlib. Note that, in the mathematical formulation in this guide, a training label $y$ is denoted as @@ -123,7 +127,7 @@ either $+1$ (positive) or $-1$ (negative), which is convenient for the formulation. *However*, the negative label is represented by $0$ in MLlib instead of $-1$, to be consistent with multiclass labeling. -### Linear support vector machines (SVMs) +### Linear Support Vector Machines (SVMs) The [linear SVM](http://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM) is a standard method for large-scale classification tasks. It is a linear method as described above in equation `$\eqref{eq:regPrimal}$`, with the loss function in the formulation given by the hinge loss: diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md index 45141c235be90..4d101afca2c97 100644 --- a/docs/mllib-optimization.md +++ b/docs/mllib-optimization.md @@ -138,6 +138,12 @@ vertical scalability issue (the number of training features) when computing the explicitly in Newton's method. As a result, L-BFGS often achieves rapider convergence compared with other first-order optimization. +### Choosing an Optimization Method + +[Linear methods](mllib-linear-methods.html) use optimization internally, and some linear methods in MLlib support both SGD and L-BFGS. +Different optimization methods can have different convergence guarantees depending on the properties of the objective function, and we cannot cover the literature here. +In general, when L-BFGS is available, we recommend using it instead of SGD since L-BFGS tends to converge faster (in fewer iterations). + ## Implementation in MLlib ### Gradient descent and stochastic gradient descent @@ -168,10 +174,7 @@ descent. All updaters in MLlib use a step size at the t-th step equal to * `regParam` is the regularization parameter when using L1 or L2 regularization. * `miniBatchFraction` is the fraction of the total data that is sampled in each iteration, to compute the gradient direction. - -Available algorithms for gradient descent: - -* [GradientDescent](api/scala/index.html#org.apache.spark.mllib.optimization.GradientDescent) + * Sampling still requires a pass over the entire RDD, so decreasing `miniBatchFraction` may not speed up optimization much. Users will see the greatest speedup when the gradient is expensive to compute, for only the chosen samples are used for computing the gradient. ### L-BFGS L-BFGS is currently only a low-level optimization primitive in `MLlib`. If you want to use L-BFGS in various @@ -359,13 +362,15 @@ public class LBFGSExample { {% endhighlight %} -#### Developer's note + +## Developer's notes + Since the Hessian is constructed approximately from previous gradient evaluations, the objective function can not be changed during the optimization process. As a result, Stochastic L-BFGS will not work naively by just using miniBatch; therefore, we don't provide this until we have better understanding. -* `Updater` is a class originally designed for gradient decent which computes +`Updater` is a class originally designed for gradient decent which computes the actual gradient descent step. However, we're able to take the gradient and loss of objective function of regularization for L-BFGS by ignoring the part of logic only for gradient decent such as adaptive step size stuff. We will refactorize From 657a88835d8bf22488b53d50f75281d7dc32442e Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Thu, 4 Dec 2014 09:57:50 +0800 Subject: [PATCH 56/82] [SPARK-4580] [SPARK-4610] [mllib] [docs] Documentation for tree ensembles + DecisionTree API fix Major changes: * Added programming guide sections for tree ensembles * Added examples for tree ensembles * Updated DecisionTree programming guide with more info on parameters * **API change**: Standardized the tree parameter for the number of classes (for classification) Minor changes: * Updated decision tree documentation * Updated existing tree and tree ensemble examples * Use train/test split, and compute test error instead of training error. * Fixed decision_tree_runner.py to actually use the number of classes it computes from data. (small bug fix) Note: I know this is a lot of lines, but most is covered by: * Programming guide sections for gradient boosting and random forests. (The changes are probably best viewed by generating the docs locally.) * New examples (which were copied from the programming guide) * The "numClasses" renaming I have run all examples and relevant unit tests. CC: mengxr manishamde codedeft Author: Joseph K. Bradley Author: Joseph K. Bradley Closes #3461 from jkbradley/ensemble-docs and squashes the following commits: 70a75f3 [Joseph K. Bradley] updated forest vs boosting comparison d1de753 [Joseph K. Bradley] Added note about toString and toDebugString for DecisionTree to migration guide 8e87f8f [Joseph K. Bradley] Combined GBT and RandomForest guides into one ensembles guide 6fab846 [Joseph K. Bradley] small fixes based on review b9f8576 [Joseph K. Bradley] updated decision tree doc 375204c [Joseph K. Bradley] fixed python style 2b60b6e [Joseph K. Bradley] merged Java RandomForest examples into 1 file. added header. Fixed small bug in same example in the programming guide. 706d332 [Joseph K. Bradley] updated python DT runner to print full model if it is small c76c823 [Joseph K. Bradley] added migration guide for mllib abe5ed7 [Joseph K. Bradley] added examples for random forest in Java and Python to examples folder 07fc11d [Joseph K. Bradley] Renamed numClassesForClassification to numClasses everywhere in trees and ensembles. This is a breaking API change, but it was necessary to correct an API inconsistency in Spark 1.1 (where Python DecisionTree used numClasses but Scala used numClassesForClassification). cdfdfbc [Joseph K. Bradley] added examples for GBT 6372a2b [Joseph K. Bradley] updated decision tree examples to use random split. tested all of them. ad3e695 [Joseph K. Bradley] added gbt and random forest to programming guide. still need to update their examples --- docs/mllib-decision-tree.md | 241 ++++--- docs/mllib-ensembles.md | 653 ++++++++++++++++++ docs/mllib-guide.md | 29 +- .../mllib/JavaGradientBoostedTreesRunner.java | 2 +- .../mllib/JavaRandomForestExample.java | 139 ++++ .../main/python/mllib/decision_tree_runner.py | 17 +- .../python/mllib/random_forest_example.py | 89 +++ .../examples/mllib/DecisionTreeRunner.scala | 2 +- .../mllib/GradientBoostedTreesRunner.scala | 2 +- .../mllib/api/python/PythonMLLibAPI.scala | 4 +- .../spark/mllib/tree/DecisionTree.scala | 22 +- .../spark/mllib/tree/RandomForest.scala | 20 +- .../tree/configuration/BoostingStrategy.scala | 6 +- .../mllib/tree/configuration/Strategy.scala | 26 +- .../tree/impl/DecisionTreeMetadata.scala | 2 +- .../spark/mllib/tree/DecisionTreeSuite.scala | 46 +- .../tree/GradientBoostedTreesSuite.scala | 2 +- .../spark/mllib/tree/RandomForestSuite.scala | 14 +- python/pyspark/mllib/tree.py | 6 +- 19 files changed, 1140 insertions(+), 182 deletions(-) create mode 100644 docs/mllib-ensembles.md create mode 100644 examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java create mode 100755 examples/src/main/python/mllib/random_forest_example.py diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md index 12a6afbeea829..fc8e732251a30 100644 --- a/docs/mllib-decision-tree.md +++ b/docs/mllib-decision-tree.md @@ -11,7 +11,7 @@ displayTitle: MLlib - Decision Tree and their ensembles are popular methods for the machine learning tasks of classification and regression. Decision trees are widely used since they are easy to interpret, handle categorical features, extend to the multiclass classification setting, do not require -feature scaling and are able to capture nonlinearities and feature interactions. Tree ensemble +feature scaling, and are able to capture non-linearities and feature interactions. Tree ensemble algorithms such as random forests and boosting are among the top performers for classification and regression tasks. @@ -19,6 +19,8 @@ MLlib supports decision trees for binary and multiclass classification and for r using both continuous and categorical features. The implementation partitions data by rows, allowing distributed training with millions of instances. +Ensembles of trees (Random Forests and Gradient-Boosted Trees) are described in the [Ensembles guide](mllib-ensembles.html). + ## Basic algorithm The decision tree is a greedy algorithm that performs a recursive binary partitioning of the feature @@ -42,18 +44,18 @@ impurity measure for regression (variance). Gini impurity Classification - $\sum_{i=1}^{M} f_i(1-f_i)$$f_i$ is the frequency of label $i$ at a node and $M$ is the number of unique labels. + $\sum_{i=1}^{C} f_i(1-f_i)$$f_i$ is the frequency of label $i$ at a node and $C$ is the number of unique labels. Entropy Classification - $\sum_{i=1}^{M} -f_ilog(f_i)$$f_i$ is the frequency of label $i$ at a node and $M$ is the number of unique labels. + $\sum_{i=1}^{C} -f_ilog(f_i)$$f_i$ is the frequency of label $i$ at a node and $C$ is the number of unique labels. Variance Regression - $\frac{1}{n} \sum_{i=1}^{N} (x_i - \mu)^2$$y_i$ is label for an instance, - $N$ is the number of instances and $\mu$ is the mean given by $\frac{1}{N} \sum_{i=1}^n x_i$. + $\frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2$$y_i$ is label for an instance, + $N$ is the number of instances and $\mu$ is the mean given by $\frac{1}{N} \sum_{i=1}^N x_i$. @@ -103,36 +105,73 @@ and the resulting `$M-1$` split candidates are considered. ### Stopping rule -The recursive tree construction is stopped at a node when one of the two conditions is met: +The recursive tree construction is stopped at a node when one of the following conditions is met: 1. The node depth is equal to the `maxDepth` training parameter. -2. No split candidate leads to an information gain at the node. +2. No split candidate leads to an information gain greater than `minInfoGain`. +3. No split candidate produces child nodes which each have at least `minInstancesPerNode` training instances. + +## Usage tips + +We include a few guidelines for using decision trees by discussing the various parameters. +The parameters are listed below roughly in order of descending importance. New users should mainly consider the "Problem specification parameters" section and the `maxDepth` parameter. + +### Problem specification parameters + +These parameters describe the problem you want to solve and your dataset. +They should be specified and do not require tuning. + +* **`algo`**: `Classification` or `Regression` + +* **`numClasses`**: Number of classes (for `Classification` only) + +* **`categoricalFeaturesInfo`**: Specifies which features are categorical and how many categorical values each of those features can take. This is given as a map from feature indices to feature arity (number of categories). Any features not in this map are treated as continuous. + * E.g., `Map(0 -> 2, 4 -> 10)` specifies that feature `0` is binary (taking values `0` or `1`) and that feature `4` has 10 categories (values `{0, 1, ..., 9}`). Note that feature indices are 0-based: features `0` and `4` are the 1st and 5th elements of an instance's feature vector. + * Note that you do not have to specify `categoricalFeaturesInfo`. The algorithm will still run and may get reasonable results. However, performance should be better if categorical features are properly designated. + +### Stopping criteria + +These parameters determine when the tree stops building (adding new nodes). +When tuning these parameters, be careful to validate on held-out test data to avoid overfitting. + +* **`maxDepth`**: Maximum depth of a tree. Deeper trees are more expressive (potentially allowing higher accuracy), but they are also more costly to train and are more likely to overfit. + +* **`minInstancesPerNode`**: For a node to be split further, each of its children must receive at least this number of training instances. This is commonly used with [RandomForest](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest) since those are often trained deeper than individual trees. + +* **`minInfoGain`**: For a node to be split further, the split must improve at least this much (in terms of information gain). + +### Tunable parameters -## Implementation details +These parameters may be tuned. Be careful to validate on held-out test data when tuning in order to avoid overfitting. -### Max memory requirements +* **`maxBins`**: Number of bins used when discretizing continuous features. + * Increasing `maxBins` allows the algorithm to consider more split candidates and make fine-grained split decisions. However, it also increases computation and communication. + * Note that the `maxBins` parameter must be at least the maximum number of categories `$M$` for any categorical feature. -For faster processing, the decision tree algorithm performs simultaneous histogram computations for -all nodes at each level of the tree. This could lead to high memory requirements at deeper levels -of the tree, potentially leading to memory overflow errors. To alleviate this problem, a `maxMemoryInMB` -training parameter specifies the maximum amount of memory at the workers (twice as much at the -master) to be allocated to the histogram computation. The default value is conservatively chosen to -be 256 MB to allow the decision algorithm to work in most scenarios. Once the memory requirements -for a level-wise computation cross the `maxMemoryInMB` threshold, the node training tasks at each -subsequent level are split into smaller tasks. +* **`maxMemoryInMB`**: Amount of memory to be used for collecting sufficient statistics. + * The default value is conservatively chosen to be 256 MB to allow the decision algorithm to work in most scenarios. Increasing `maxMemoryInMB` can lead to faster training (if the memory is available) by allowing fewer passes over the data. However, there may be decreasing returns as `maxMemoryInMB` grows since the amount of communication on each iteration can be proportional to `maxMemoryInMB`. + * *Implementation details*: For faster processing, the decision tree algorithm collects statistics about groups of nodes to split (rather than 1 node at a time). The number of nodes which can be handled in one group is determined by the memory requirements (which vary per features). The `maxMemoryInMB` parameter specifies the memory limit in terms of megabytes which each worker can use for these statistics. -Note that, if you have a large amount of memory, increasing `maxMemoryInMB` can lead to faster -training by requiring fewer passes over the data. +* **`subsamplingRate`**: Fraction of the training data used for learning the decision tree. This parameter is most relevant for training ensembles of trees (using [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest) and [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees)), where it can be useful to subsample the original data. For training a single decision tree, this parameter is less useful since the number of training instances is generally not the main constraint. -### Binning feature values +* **`impurity`**: Impurity measure (discussed above) used to choose between candidate splits. This measure must match the `algo` parameter. -Increasing `maxBins` allows the algorithm to consider more split candidates and make fine-grained -split decisions. However, it also increases computation and communication. +### Caching and checkpointing -Note that the `maxBins` parameter must be at least the maximum number of categories `$M$` for -any categorical feature. +MLlib 1.2 adds several features for scaling up to larger (deeper) trees and tree ensembles. When `maxDepth` is set to be large, it can be useful to turn on node ID caching and checkpointing. These parameters are also useful for [RandomForest](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest) when `numTrees` is set to be large. -### Scaling +* **`useNodeIdCache`**: If this is set to true, the algorithm will avoid passing the current model (tree or trees) to executors on each iteration. + * This can be useful with deep trees (speeding up computation on workers) and for large Random Forests (reducing communication on each iteration). + * *Implementation details*: By default, the algorithm communicates the current model to executors so that executors can match training instances with tree nodes. When this setting is turned on, then the algorithm will instead cache this information. + +Node ID caching generates a sequence of RDDs (1 per iteration). This long lineage can cause performance problems, but checkpointing intermediate RDDs can alleviate those problems. +Note that checkpointing is only applicable when `useNodeIdCache` is set to true. + +* **`checkpointDir`**: Directory for checkpointing node ID cache RDDs. + +* **`checkpointInterval`**: Frequency for checkpointing node ID cache RDDs. Setting this too low will cause extra overhead from writing to HDFS; setting this too high can cause problems if executors fail and the RDD needs to be recomputed. + +## Scaling Computation scales approximately linearly in the number of training instances, in the number of features, and in the `maxBins` parameter. @@ -148,7 +187,7 @@ The example below demonstrates how to load a [LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/), parse it as an RDD of `LabeledPoint` and then perform classification using a decision tree with Gini impurity as an impurity measure and a -maximum tree depth of 5. The training error is calculated to measure the algorithm accuracy. +maximum tree depth of 5. The test error is calculated to measure the algorithm accuracy.
@@ -158,8 +197,10 @@ import org.apache.spark.mllib.tree.DecisionTree import org.apache.spark.mllib.util.MLUtils // Load and parse the data file. -// Cache the data since we will use it again to compute training error. -val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache() +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +// Split the data into training and test sets (30% held out for testing) +val splits = data.randomSplit(Array(0.7, 0.3)) +val (trainingData, testData) = (splits(0), splits(1)) // Train a DecisionTree model. // Empty categoricalFeaturesInfo indicates all features are continuous. @@ -169,17 +210,17 @@ val impurity = "gini" val maxDepth = 5 val maxBins = 32 -val model = DecisionTree.trainClassifier(data, numClasses, categoricalFeaturesInfo, impurity, - maxDepth, maxBins) +val model = DecisionTree.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo, + impurity, maxDepth, maxBins) -// Evaluate model on training instances and compute training error -val labelAndPreds = data.map { point => +// Evaluate model on test instances and compute test error +val labelAndPreds = testData.map { point => val prediction = model.predict(point.features) (point.label, prediction) } -val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / data.count -println("Training Error = " + trainErr) -println("Learned classification tree model:\n" + model) +val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count() +println("Test Error = " + testErr) +println("Learned classification tree model:\n" + model.toDebugString) {% endhighlight %}
@@ -187,7 +228,6 @@ println("Learned classification tree model:\n" + model) {% highlight java %} import java.util.HashMap; import scala.Tuple2; -import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -203,37 +243,42 @@ SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree"); JavaSparkContext sc = new JavaSparkContext(sparkConf); // Load and parse the data file. -// Cache the data since we will use it again to compute training error. String datapath = "data/mllib/sample_libsvm_data.txt"; -JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache(); +JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); +// Split the data into training and test sets (30% held out for testing) +JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); +JavaRDD trainingData = splits[0]; +JavaRDD testData = splits[1]; // Set parameters. // Empty categoricalFeaturesInfo indicates all features are continuous. Integer numClasses = 2; -HashMap categoricalFeaturesInfo = new HashMap(); +Map categoricalFeaturesInfo = new HashMap(); String impurity = "gini"; Integer maxDepth = 5; Integer maxBins = 32; // Train a DecisionTree model for classification. -final DecisionTreeModel model = DecisionTree.trainClassifier(data, numClasses, +final DecisionTreeModel model = DecisionTree.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo, impurity, maxDepth, maxBins); -// Evaluate model on training instances and compute training error +// Evaluate model on test instances and compute test error JavaPairRDD predictionAndLabel = - data.mapToPair(new PairFunction() { - @Override public Tuple2 call(LabeledPoint p) { + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { return new Tuple2(model.predict(p.features()), p.label()); } }); -Double trainErr = +Double testErr = 1.0 * predictionAndLabel.filter(new Function, Boolean>() { - @Override public Boolean call(Tuple2 pl) { + @Override + public Boolean call(Tuple2 pl) { return !pl._1().equals(pl._2()); } - }).count() / data.count(); -System.out.println("Training error: " + trainErr); -System.out.println("Learned classification tree model:\n" + model); + }).count() / testData.count(); +System.out.println("Test Error: " + testErr); +System.out.println("Learned classification tree model:\n" + model.toDebugString()); {% endhighlight %} @@ -244,26 +289,23 @@ from pyspark.mllib.tree import DecisionTree from pyspark.mllib.util import MLUtils # Load and parse the data file into an RDD of LabeledPoint. -# Cache the data since we will use it again to compute training error. -data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache() +data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt') +# Split the data into training and test sets (30% held out for testing) +(trainingData, testData) = data.randomSplit([0.7, 0.3]) # Train a DecisionTree model. # Empty categoricalFeaturesInfo indicates all features are continuous. -model = DecisionTree.trainClassifier(data, numClasses=2, categoricalFeaturesInfo={}, +model = DecisionTree.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={}, impurity='gini', maxDepth=5, maxBins=32) -# Evaluate model on training instances and compute training error -predictions = model.predict(data.map(lambda x: x.features)) -labelsAndPredictions = data.map(lambda lp: lp.label).zip(predictions) -trainErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(data.count()) -print('Training Error = ' + str(trainErr)) +# Evaluate model on test instances and compute test error +predictions = model.predict(testData.map(lambda x: x.features)) +labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) +testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count()) +print('Test Error = ' + str(testErr)) print('Learned classification tree model:') -print(model) +print(model.toDebugString()) {% endhighlight %} - -Note: When making predictions for a dataset, it is more efficient to do batch prediction rather -than separately calling `predict` on each data point. This is because the Python code makes calls -to an underlying `DecisionTree` model in Scala. @@ -285,8 +327,10 @@ import org.apache.spark.mllib.tree.DecisionTree import org.apache.spark.mllib.util.MLUtils // Load and parse the data file. -// Cache the data since we will use it again to compute training error. -val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache() +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +// Split the data into training and test sets (30% held out for testing) +val splits = data.randomSplit(Array(0.7, 0.3)) +val (trainingData, testData) = (splits(0), splits(1)) // Train a DecisionTree model. // Empty categoricalFeaturesInfo indicates all features are continuous. @@ -295,17 +339,17 @@ val impurity = "variance" val maxDepth = 5 val maxBins = 32 -val model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo, impurity, +val model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo, impurity, maxDepth, maxBins) -// Evaluate model on training instances and compute training error -val labelsAndPredictions = data.map { point => +// Evaluate model on test instances and compute test error +val labelsAndPredictions = testData.map { point => val prediction = model.predict(point.features) (point.label, prediction) } -val trainMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean() -println("Training Mean Squared Error = " + trainMSE) -println("Learned regression tree model:\n" + model) +val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean() +println("Test Mean Squared Error = " + testMSE) +println("Learned regression tree model:\n" + model.toDebugString) {% endhighlight %} @@ -325,45 +369,51 @@ import org.apache.spark.mllib.tree.model.DecisionTreeModel; import org.apache.spark.mllib.util.MLUtils; import org.apache.spark.SparkConf; -// Load and parse the data file. -// Cache the data since we will use it again to compute training error. -String datapath = "data/mllib/sample_libsvm_data.txt"; -JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD().cache(); - SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTree"); JavaSparkContext sc = new JavaSparkContext(sparkConf); +// Load and parse the data file. +String datapath = "data/mllib/sample_libsvm_data.txt"; +JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); +// Split the data into training and test sets (30% held out for testing) +JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); +JavaRDD trainingData = splits[0]; +JavaRDD testData = splits[1]; + // Set parameters. // Empty categoricalFeaturesInfo indicates all features are continuous. -HashMap categoricalFeaturesInfo = new HashMap(); +Map categoricalFeaturesInfo = new HashMap(); String impurity = "variance"; Integer maxDepth = 5; Integer maxBins = 32; // Train a DecisionTree model. -final DecisionTreeModel model = DecisionTree.trainRegressor(data, +final DecisionTreeModel model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo, impurity, maxDepth, maxBins); -// Evaluate model on training instances and compute training error +// Evaluate model on test instances and compute test error JavaPairRDD predictionAndLabel = - data.mapToPair(new PairFunction() { - @Override public Tuple2 call(LabeledPoint p) { + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { return new Tuple2(model.predict(p.features()), p.label()); } }); -Double trainMSE = +Double testMSE = predictionAndLabel.map(new Function, Double>() { - @Override public Double call(Tuple2 pl) { + @Override + public Double call(Tuple2 pl) { Double diff = pl._1() - pl._2(); return diff * diff; } }).reduce(new Function2() { - @Override public Double call(Double a, Double b) { + @Override + public Double call(Double a, Double b) { return a + b; } }) / data.count(); -System.out.println("Training Mean Squared Error: " + trainMSE); -System.out.println("Learned regression tree model:\n" + model); +System.out.println("Test Mean Squared Error: " + testMSE); +System.out.println("Learned regression tree model:\n" + model.toDebugString()); {% endhighlight %} @@ -374,26 +424,23 @@ from pyspark.mllib.tree import DecisionTree from pyspark.mllib.util import MLUtils # Load and parse the data file into an RDD of LabeledPoint. -# Cache the data since we will use it again to compute training error. -data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt').cache() +data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt') +# Split the data into training and test sets (30% held out for testing) +(trainingData, testData) = data.randomSplit([0.7, 0.3]) # Train a DecisionTree model. # Empty categoricalFeaturesInfo indicates all features are continuous. -model = DecisionTree.trainRegressor(data, categoricalFeaturesInfo={}, +model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo={}, impurity='variance', maxDepth=5, maxBins=32) -# Evaluate model on training instances and compute training error -predictions = model.predict(data.map(lambda x: x.features)) -labelsAndPredictions = data.map(lambda lp: lp.label).zip(predictions) -trainMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(data.count()) -print('Training Mean Squared Error = ' + str(trainMSE)) +# Evaluate model on test instances and compute test error +predictions = model.predict(testData.map(lambda x: x.features)) +labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) +testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(testData.count()) +print('Test Mean Squared Error = ' + str(testMSE)) print('Learned regression tree model:') -print(model) +print(model.toDebugString()) {% endhighlight %} - -Note: When making predictions for a dataset, it is more efficient to do batch prediction rather -than separately calling `predict` on each data point. This is because the Python code makes calls -to an underlying `DecisionTree` model in Scala. diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md new file mode 100644 index 0000000000000..23ede04b62d5b --- /dev/null +++ b/docs/mllib-ensembles.md @@ -0,0 +1,653 @@ +--- +layout: global +title: Ensembles - MLlib +displayTitle: MLlib - Ensembles +--- + +* Table of contents +{:toc} + +An [ensemble method](http://en.wikipedia.org/wiki/Ensemble_learning) +is a learning algorithm which creates a model composed of a set of other base models. +MLlib supports two major ensemble algorithms: [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBosotedTrees) and [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest). +Both use [decision trees](mllib-decision-tree.html) as their base models. + +## Gradient-Boosted Trees vs. Random Forests + +Both [Gradient-Boosted Trees (GBTs)](mllib-ensembles.html#Gradient-Boosted-Trees-(GBTS)) and [Random Forests](mllib-ensembles.html#Random-Forests) are algorithms for learning ensembles of trees, but the training processes are different. There are several practical trade-offs: + + * GBTs train one tree at a time, so they can take longer to train than random forests. Random Forests can train multiple trees in parallel. + * On the other hand, it is often reasonable to use smaller (shallower) trees with GBTs than with Random Forests, and training smaller trees takes less time. + * Random Forests can be less prone to overfitting. Training more trees in a Random Forest reduces the likelihood of overfitting, but training more trees with GBTs increases the likelihood of overfitting. (In statistical language, Random Forests reduce variance by using more trees, whereas GBTs reduce bias by using more trees.) + * Random Forests can be easier to tune since performance improves monotonically with the number of trees (whereas performance can start to decrease for GBTs if the number of trees grows too large). + +In short, both algorithms can be effective, and the choice should be based on the particular dataset. + +## Random Forests + +[Random forests](http://en.wikipedia.org/wiki/Random_forest) +are ensembles of [decision trees](mllib-decision-tree.html). +Random forests are one of the most successful machine learning models for classification and +regression. They combine many decision trees in order to reduce the risk of overfitting. +Like decision trees, random forests handle categorical features, +extend to the multiclass classification setting, do not require +feature scaling, and are able to capture non-linearities and feature interactions. + +MLlib supports random forests for binary and multiclass classification and for regression, +using both continuous and categorical features. +MLlib implements random forests using the existing [decision tree](mllib-decision-tree.html) +implementation. Please see the decision tree guide for more information on trees. + +### Basic algorithm + +Random forests train a set of decision trees separately, so the training can be done in parallel. +The algorithm injects randomness into the training process so that each decision tree is a bit +different. Combining the predictions from each tree reduces the variance of the predictions, +improving the performance on test data. + +#### Training + +The randomness injected into the training process includes: + +* Subsampling the original dataset on each iteration to get a different training set (a.k.a. bootstrapping). +* Considering different random subsets of features to split on at each tree node. + +Apart from these randomizations, decision tree training is done in the same way as for individual decision trees. + +#### Prediction + +To make a prediction on a new instance, a random forest must aggregate the predictions from its set of decision trees. This aggregation is done differently for classification and regression. + +*Classification*: Majority vote. Each tree's prediction is counted as a vote for one class. The label is predicted to be the class which receives the most votes. + +*Regression*: Averaging. Each tree predicts a real value. The label is predicted to be the average of the tree predictions. + +### Usage tips + +We include a few guidelines for using random forests by discussing the various parameters. +We omit some decision tree parameters since those are covered in the [decision tree guide](mllib-decision-tree.html). + +The first two parameters we mention are the most important, and tuning them can often improve performance: + +* **`numTrees`**: Number of trees in the forest. + * Increasing the number of trees will decrease the variance in predictions, improving the model's test-time accuracy. + * Training time increases roughly linearly in the number of trees. + +* **`maxDepth`**: Maximum depth of each tree in the forest. + * Increasing the depth makes the model more expressive and powerful. However, deep trees take longer to train and are also more prone to overfitting. + * In general, it is acceptable to train deeper trees when using random forests than when using a single decision tree. One tree is more likely to overfit than a random forest (because of the variance reduction from averaging multiple trees in the forest). + +The next two parameters generally do not require tuning. However, they can be tuned to speed up training. + +* **`subsamplingRate`**: This parameter specifies the size of the dataset used for training each tree in the forest, as a fraction of the size of the original dataset. The default (1.0) is recommended, but decreasing this fraction can speed up training. + +* **`featureSubsetStrategy`**: Number of features to use as candidates for splitting at each tree node. The number is specified as a fraction or function of the total number of features. Decreasing this number will speed up training, but can sometimes impact performance if too low. + +### Examples + +#### Classification + +The example below demonstrates how to load a +[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/), +parse it as an RDD of `LabeledPoint` and then +perform classification using a Random Forest. +The test error is calculated to measure the algorithm accuracy. + +
+ +
+{% highlight scala %} +import org.apache.spark.mllib.tree.RandomForest +import org.apache.spark.mllib.util.MLUtils + +// Load and parse the data file. +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +// Split the data into training and test sets (30% held out for testing) +val splits = data.randomSplit(Array(0.7, 0.3)) +val (trainingData, testData) = (splits(0), splits(1)) + +// Train a RandomForest model. +// Empty categoricalFeaturesInfo indicates all features are continuous. +val numClasses = 2 +val categoricalFeaturesInfo = Map[Int, Int]() +val numTrees = 3 // Use more in practice. +val featureSubsetStrategy = "auto" // Let the algorithm choose. +val impurity = "gini" +val maxDepth = 4 +val maxBins = 32 + +val model = RandomForest.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo, + numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins) + +// Evaluate model on test instances and compute test error +val labelAndPreds = testData.map { point => + val prediction = model.predict(point.features) + (point.label, prediction) +} +val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count() +println("Test Error = " + testErr) +println("Learned classification forest model:\n" + model.toDebugString) +{% endhighlight %} +
+ +
+{% highlight java %} +import scala.Tuple2; +import java.util.HashMap; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.tree.RandomForest; +import org.apache.spark.mllib.tree.model.RandomForestModel; +import org.apache.spark.mllib.util.MLUtils; + +SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestClassification"); +JavaSparkContext sc = new JavaSparkContext(sparkConf); + +// Load and parse the data file. +String datapath = "data/mllib/sample_libsvm_data.txt"; +JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); +// Split the data into training and test sets (30% held out for testing) +JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); +JavaRDD trainingData = splits[0]; +JavaRDD testData = splits[1]; + +// Train a RandomForest model. +// Empty categoricalFeaturesInfo indicates all features are continuous. +Integer numClasses = 2; +HashMap categoricalFeaturesInfo = new HashMap(); +Integer numTrees = 3; // Use more in practice. +String featureSubsetStrategy = "auto"; // Let the algorithm choose. +String impurity = "gini"; +Integer maxDepth = 5; +Integer maxBins = 32; +Integer seed = 12345; + +final RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses, + categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, + seed); + +// Evaluate model on test instances and compute test error +JavaPairRDD predictionAndLabel = + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { + return new Tuple2(model.predict(p.features()), p.label()); + } + }); +Double testErr = + 1.0 * predictionAndLabel.filter(new Function, Boolean>() { + @Override + public Boolean call(Tuple2 pl) { + return !pl._1().equals(pl._2()); + } + }).count() / testData.count(); +System.out.println("Test Error: " + testErr); +System.out.println("Learned classification forest model:\n" + model.toDebugString()); +{% endhighlight %} +
+ +
+{% highlight python %} +from pyspark.mllib.tree import RandomForest +from pyspark.mllib.util import MLUtils + +# Load and parse the data file into an RDD of LabeledPoint. +data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt') +# Split the data into training and test sets (30% held out for testing) +(trainingData, testData) = data.randomSplit([0.7, 0.3]) + +# Train a RandomForest model. +# Empty categoricalFeaturesInfo indicates all features are continuous. +# Note: Use larger numTrees in practice. +# Setting featureSubsetStrategy="auto" lets the algorithm choose. +model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={}, + numTrees=3, featureSubsetStrategy="auto", + impurity='gini', maxDepth=4, maxBins=32) + +# Evaluate model on test instances and compute test error +predictions = model.predict(testData.map(lambda x: x.features)) +labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) +testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count()) +print('Test Error = ' + str(testErr)) +print('Learned classification forest model:') +print(model.toDebugString()) +{% endhighlight %} +
+ +
+ +#### Regression + +The example below demonstrates how to load a +[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/), +parse it as an RDD of `LabeledPoint` and then +perform regression using a Random Forest. +The Mean Squared Error (MSE) is computed at the end to evaluate +[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit). + +
+ +
+{% highlight scala %} +import org.apache.spark.mllib.tree.RandomForest +import org.apache.spark.mllib.util.MLUtils + +// Load and parse the data file. +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +// Split the data into training and test sets (30% held out for testing) +val splits = data.randomSplit(Array(0.7, 0.3)) +val (trainingData, testData) = (splits(0), splits(1)) + +// Train a RandomForest model. +// Empty categoricalFeaturesInfo indicates all features are continuous. +val numClasses = 2 +val categoricalFeaturesInfo = Map[Int, Int]() +val numTrees = 3 // Use more in practice. +val featureSubsetStrategy = "auto" // Let the algorithm choose. +val impurity = "variance" +val maxDepth = 4 +val maxBins = 32 + +val model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo, + numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins) + +// Evaluate model on test instances and compute test error +val labelsAndPredictions = testData.map { point => + val prediction = model.predict(point.features) + (point.label, prediction) +} +val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean() +println("Test Mean Squared Error = " + testMSE) +println("Learned regression forest model:\n" + model.toDebugString) +{% endhighlight %} +
+ +
+{% highlight java %} +import java.util.HashMap; +import scala.Tuple2; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.tree.RandomForest; +import org.apache.spark.mllib.tree.model.RandomForestModel; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.SparkConf; + +SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForest"); +JavaSparkContext sc = new JavaSparkContext(sparkConf); + +// Load and parse the data file. +String datapath = "data/mllib/sample_libsvm_data.txt"; +JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); +// Split the data into training and test sets (30% held out for testing) +JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); +JavaRDD trainingData = splits[0]; +JavaRDD testData = splits[1]; + +// Set parameters. +// Empty categoricalFeaturesInfo indicates all features are continuous. +Map categoricalFeaturesInfo = new HashMap(); +String impurity = "variance"; +Integer maxDepth = 4; +Integer maxBins = 32; + +// Train a RandomForest model. +final RandomForestModel model = RandomForest.trainRegressor(trainingData, + categoricalFeaturesInfo, impurity, maxDepth, maxBins); + +// Evaluate model on test instances and compute test error +JavaPairRDD predictionAndLabel = + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { + return new Tuple2(model.predict(p.features()), p.label()); + } + }); +Double testMSE = + predictionAndLabel.map(new Function, Double>() { + @Override + public Double call(Tuple2 pl) { + Double diff = pl._1() - pl._2(); + return diff * diff; + } + }).reduce(new Function2() { + @Override + public Double call(Double a, Double b) { + return a + b; + } + }) / testData.count(); +System.out.println("Test Mean Squared Error: " + testMSE); +System.out.println("Learned regression forest model:\n" + model.toDebugString()); +{% endhighlight %} +
+ +
+{% highlight python %} +from pyspark.mllib.tree import RandomForest +from pyspark.mllib.util import MLUtils + +# Load and parse the data file into an RDD of LabeledPoint. +data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt') +# Split the data into training and test sets (30% held out for testing) +(trainingData, testData) = data.randomSplit([0.7, 0.3]) + +# Train a RandomForest model. +# Empty categoricalFeaturesInfo indicates all features are continuous. +# Note: Use larger numTrees in practice. +# Setting featureSubsetStrategy="auto" lets the algorithm choose. +model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo={}, + numTrees=3, featureSubsetStrategy="auto", + impurity='variance', maxDepth=4, maxBins=32) + +# Evaluate model on test instances and compute test error +predictions = model.predict(testData.map(lambda x: x.features)) +labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) +testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() / float(testData.count()) +print('Test Mean Squared Error = ' + str(testMSE)) +print('Learned regression forest model:') +print(model.toDebugString()) +{% endhighlight %} +
+ +
+ +## Gradient-Boosted Trees (GBTs) + +[Gradient-Boosted Trees (GBTs)](http://en.wikipedia.org/wiki/Gradient_boosting) +are ensembles of [decision trees](mllib-decision-tree.html). +GBTs iteratively train decision trees in order to minimize a loss function. +Like decision trees, GBTs handle categorical features, +extend to the multiclass classification setting, do not require +feature scaling, and are able to capture non-linearities and feature interactions. + +MLlib supports GBTs for binary classification and for regression, +using both continuous and categorical features. +MLlib implements GBTs using the existing [decision tree](mllib-decision-tree.html) implementation. Please see the decision tree guide for more information on trees. + +*Note*: GBTs do not yet support multiclass classification. For multiclass problems, please use +[decision trees](mllib-decision-tree.html) or [Random Forests](mllib-ensembles.html#Random-Forest). + +### Basic algorithm + +Gradient boosting iteratively trains a sequence of decision trees. +On each iteration, the algorithm uses the current ensemble to predict the label of each training instance and then compares the prediction with the true label. The dataset is re-labeled to put more emphasis on training instances with poor predictions. Thus, in the next iteration, the decision tree will help correct for previous mistakes. + +The specific mechanism for re-labeling instances is defined by a loss function (discussed below). With each iteration, GBTs further reduce this loss function on the training data. + +#### Losses + +The table below lists the losses currently supported by GBTs in MLlib. +Note that each loss is applicable to one of classification or regression, not both. + +Notation: $N$ = number of instances. $y_i$ = label of instance $i$. $x_i$ = features of instance $i$. $F(x_i)$ = model's predicted label for instance $i$. + + + + + + + + + + + + + + + + + + + + + + +
LossTaskFormulaDescription
Log LossClassification$2 \sum_{i=1}^{N} \log(1+\exp(-2 y_i F(x_i)))$Twice binomial negative log likelihood.
Squared ErrorRegression$\sum_{i=1}^{N} (y_i - F(x_i))^2$Also called L2 loss. Default loss for regression tasks.
Absolute ErrorRegression$\sum_{i=1}^{N} |y_i - F(x_i)|$Also called L1 loss. Can be more robust to outliers than Squared Error.
+ +### Usage tips + +We include a few guidelines for using GBTs by discussing the various parameters. +We omit some decision tree parameters since those are covered in the [decision tree guide](mllib-decision-tree.html). + +* **`loss`**: See the section above for information on losses and their applicability to tasks (classification vs. regression). Different losses can give significantly different results, depending on the dataset. + +* **`numIterations`**: This sets the number of trees in the ensemble. Each iteration produces one tree. Increasing this number makes the model more expressive, improving training data accuracy. However, test-time accuracy may suffer if this is too large. + +* **`learningRate`**: This parameter should not need to be tuned. If the algorithm behavior seems unstable, decreasing this value may improve stability. + +* **`algo`**: The algorithm or task (classification vs. regression) is set using the tree [Strategy] parameter. + + +### Examples + +GBTs currently have APIs in Scala and Java. Examples in both languages are shown below. + +#### Classification + +The example below demonstrates how to load a +[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/), +parse it as an RDD of `LabeledPoint` and then +perform classification using Gradient-Boosted Trees with log loss. +The test error is calculated to measure the algorithm accuracy. + +
+ +
+{% highlight scala %} +import org.apache.spark.mllib.tree.GradientBoostedTrees +import org.apache.spark.mllib.tree.configuration.BoostingStrategy +import org.apache.spark.mllib.util.MLUtils + +// Load and parse the data file. +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +// Split the data into training and test sets (30% held out for testing) +val splits = data.randomSplit(Array(0.7, 0.3)) +val (trainingData, testData) = (splits(0), splits(1)) + +// Train a GradientBoostedTrees model. +// The defaultParams for Classification use LogLoss by default. +val boostingStrategy = BoostingStrategy.defaultParams("Classification") +boostingStrategy.numIterations = 3 // Note: Use more iterations in practice. +boostingStrategy.treeStrategy.numClassesForClassification = 2 +boostingStrategy.treeStrategy.maxDepth = 5 +// Empty categoricalFeaturesInfo indicates all features are continuous. +boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]() + +val model = GradientBoostedTrees.train(trainingData, boostingStrategy) + +// Evaluate model on test instances and compute test error +val labelAndPreds = testData.map { point => + val prediction = model.predict(point.features) + (point.label, prediction) +} +val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count() +println("Test Error = " + testErr) +println("Learned classification GBT model:\n" + model.toDebugString) +{% endhighlight %} +
+ +
+{% highlight java %} +import scala.Tuple2; +import java.util.HashMap; +import java.util.Map; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.tree.GradientBoostedTrees; +import org.apache.spark.mllib.tree.configuration.BoostingStrategy; +import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel; +import org.apache.spark.mllib.util.MLUtils; + +SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTrees"); +JavaSparkContext sc = new JavaSparkContext(sparkConf); + +// Load and parse the data file. +String datapath = "data/mllib/sample_libsvm_data.txt"; +JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); +// Split the data into training and test sets (30% held out for testing) +JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); +JavaRDD trainingData = splits[0]; +JavaRDD testData = splits[1]; + +// Train a GradientBoostedTrees model. +// The defaultParams for Classification use LogLoss by default. +BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Classification"); +boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice. +boostingStrategy.getTreeStrategy().setNumClassesForClassification(2); +boostingStrategy.getTreeStrategy().setMaxDepth(5); +// Empty categoricalFeaturesInfo indicates all features are continuous. +Map categoricalFeaturesInfo = new HashMap(); +boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo); + +final GradientBoostedTreesModel model = + GradientBoostedTrees.train(trainingData, boostingStrategy); + +// Evaluate model on test instances and compute test error +JavaPairRDD predictionAndLabel = + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { + return new Tuple2(model.predict(p.features()), p.label()); + } + }); +Double testErr = + 1.0 * predictionAndLabel.filter(new Function, Boolean>() { + @Override + public Boolean call(Tuple2 pl) { + return !pl._1().equals(pl._2()); + } + }).count() / testData.count(); +System.out.println("Test Error: " + testErr); +System.out.println("Learned classification GBT model:\n" + model.toDebugString()); +{% endhighlight %} +
+ +
+ +#### Regression + +The example below demonstrates how to load a +[LIBSVM data file](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/), +parse it as an RDD of `LabeledPoint` and then +perform regression using Gradient-Boosted Trees with Squared Error as the loss. +The Mean Squared Error (MSE) is computed at the end to evaluate +[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit). + +
+ +
+{% highlight scala %} +import org.apache.spark.mllib.tree.GradientBoostedTrees +import org.apache.spark.mllib.tree.configuration.BoostingStrategy +import org.apache.spark.mllib.util.MLUtils + +// Load and parse the data file. +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +// Split the data into training and test sets (30% held out for testing) +val splits = data.randomSplit(Array(0.7, 0.3)) +val (trainingData, testData) = (splits(0), splits(1)) + +// Train a GradientBoostedTrees model. +// The defaultParams for Regression use SquaredError by default. +val boostingStrategy = BoostingStrategy.defaultParams("Regression") +boostingStrategy.numIterations = 3 // Note: Use more iterations in practice. +boostingStrategy.treeStrategy.maxDepth = 5 +// Empty categoricalFeaturesInfo indicates all features are continuous. +boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]() + +val model = GradientBoostedTrees.train(trainingData, boostingStrategy) + +// Evaluate model on test instances and compute test error +val labelsAndPredictions = testData.map { point => + val prediction = model.predict(point.features) + (point.label, prediction) +} +val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean() +println("Test Mean Squared Error = " + testMSE) +println("Learned regression GBT model:\n" + model.toDebugString) +{% endhighlight %} +
+ +
+{% highlight java %} +import scala.Tuple2; +import java.util.HashMap; +import java.util.Map; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.tree.GradientBoostedTrees; +import org.apache.spark.mllib.tree.configuration.BoostingStrategy; +import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel; +import org.apache.spark.mllib.util.MLUtils; + +SparkConf sparkConf = new SparkConf().setAppName("JavaGradientBoostedTrees"); +JavaSparkContext sc = new JavaSparkContext(sparkConf); + +// Load and parse the data file. +String datapath = "data/mllib/sample_libsvm_data.txt"; +JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); +// Split the data into training and test sets (30% held out for testing) +JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); +JavaRDD trainingData = splits[0]; +JavaRDD testData = splits[1]; + +// Train a GradientBoostedTrees model. +// The defaultParams for Regression use SquaredError by default. +BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Regression"); +boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice. +boostingStrategy.getTreeStrategy().setMaxDepth(5); +// Empty categoricalFeaturesInfo indicates all features are continuous. +Map categoricalFeaturesInfo = new HashMap(); +boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo); + +final GradientBoostedTreesModel model = + GradientBoostedTrees.train(trainingData, boostingStrategy); + +// Evaluate model on test instances and compute test error +JavaPairRDD predictionAndLabel = + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { + return new Tuple2(model.predict(p.features()), p.label()); + } + }); +Double testMSE = + predictionAndLabel.map(new Function, Double>() { + @Override + public Double call(Tuple2 pl) { + Double diff = pl._1() - pl._2(); + return diff * diff; + } + }).reduce(new Function2() { + @Override + public Double call(Double a, Double b) { + return a + b; + } + }) / data.count(); +System.out.println("Test Mean Squared Error: " + testMSE); +System.out.println("Learned regression GBT model:\n" + model.toDebugString()); +{% endhighlight %} +
+ +
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index 94fc98ce4fabe..dcb6819f46cba 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -16,8 +16,9 @@ filtering, dimensionality reduction, as well as underlying optimization primitiv * random data generation * [Classification and regression](mllib-classification-regression.html) * [linear models (SVMs, logistic regression, linear regression)](mllib-linear-methods.html) - * [decision trees](mllib-decision-tree.html) * [naive Bayes](mllib-naive-bayes.html) + * [decision trees](mllib-decision-tree.html) + * [ensembles of trees](mllib-ensembles.html) (Random Forests and Gradient-Boosted Trees) * [Collaborative filtering](mllib-collaborative-filtering.html) * alternating least squares (ALS) * [Clustering](mllib-clustering.html) @@ -60,6 +61,32 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4 # Migration Guide +## From 1.1 to 1.2 + +The only API changes in MLlib v1.2 are in +[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree), +which continues to be an experimental API in MLlib 1.2: + +1. *(Breaking change)* The Scala API for classification takes a named argument specifying the number +of classes. In MLlib v1.1, this argument was called `numClasses` in Python and +`numClassesForClassification` in Scala. In MLlib v1.2, the names are both set to `numClasses`. +This `numClasses` parameter is specified either via +[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy) +or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree) +static `trainClassifier` and `trainRegressor` methods. + +2. *(Breaking change)* The API for +[`Node`](api/scala/index.html#org.apache.spark.mllib.tree.model.Node) has changed. +This should generally not affect user code, unless the user manually constructs decision trees +(instead of using the `trainClassifier` or `trainRegressor` methods). +The tree `Node` now includes more information, including the probability of the predicted label +(for classification). + +3. Printing methods' output has changed. The `toString` (Scala/Java) and `__repr__` (Python) methods used to print the full model; they now print a summary. For the full model, use `toDebugString`. + +Examples in the Spark distribution and examples in the +[Decision Trees Guide](mllib-decision-tree.html#examples) have been updated accordingly. + ## From 1.0 to 1.1 The only API changes in MLlib v1.1 are in diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java index 4a5ac404ea5ea..a1844d5d07ad4 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostedTreesRunner.java @@ -73,7 +73,7 @@ public static void main(String[] args) { return p.label(); } }).countByValue().size(); - boostingStrategy.treeStrategy().setNumClassesForClassification(numClasses); + boostingStrategy.treeStrategy().setNumClasses(numClasses); // Train a GradientBoosting model for classification. final GradientBoostedTreesModel model = GradientBoostedTrees.train(data, boostingStrategy); diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java new file mode 100644 index 0000000000000..89a4e092a5af7 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestExample.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.mllib; + +import scala.Tuple2; + +import java.util.HashMap; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.Function2; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.tree.RandomForest; +import org.apache.spark.mllib.tree.model.RandomForestModel; +import org.apache.spark.mllib.util.MLUtils; + +public final class JavaRandomForestExample { + + /** + * Note: This example illustrates binary classification. + * For information on multiclass classification, please refer to the JavaDecisionTree.java + * example. + */ + private static void testClassification(JavaRDD trainingData, + JavaRDD testData) { + // Train a RandomForest model. + // Empty categoricalFeaturesInfo indicates all features are continuous. + Integer numClasses = 2; + HashMap categoricalFeaturesInfo = new HashMap(); + Integer numTrees = 3; // Use more in practice. + String featureSubsetStrategy = "auto"; // Let the algorithm choose. + String impurity = "gini"; + Integer maxDepth = 4; + Integer maxBins = 32; + Integer seed = 12345; + + final RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses, + categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, + seed); + + // Evaluate model on test instances and compute test error + JavaPairRDD predictionAndLabel = + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { + return new Tuple2(model.predict(p.features()), p.label()); + } + }); + Double testErr = + 1.0 * predictionAndLabel.filter(new Function, Boolean>() { + @Override + public Boolean call(Tuple2 pl) { + return !pl._1().equals(pl._2()); + } + }).count() / testData.count(); + System.out.println("Test Error: " + testErr); + System.out.println("Learned classification forest model:\n" + model.toDebugString()); + } + + private static void testRegression(JavaRDD trainingData, + JavaRDD testData) { + // Train a RandomForest model. + // Empty categoricalFeaturesInfo indicates all features are continuous. + HashMap categoricalFeaturesInfo = new HashMap(); + Integer numTrees = 3; // Use more in practice. + String featureSubsetStrategy = "auto"; // Let the algorithm choose. + String impurity = "variance"; + Integer maxDepth = 4; + Integer maxBins = 32; + Integer seed = 12345; + + final RandomForestModel model = RandomForest.trainRegressor(trainingData, + categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, + seed); + + // Evaluate model on test instances and compute test error + JavaPairRDD predictionAndLabel = + testData.mapToPair(new PairFunction() { + @Override + public Tuple2 call(LabeledPoint p) { + return new Tuple2(model.predict(p.features()), p.label()); + } + }); + Double testMSE = + predictionAndLabel.map(new Function, Double>() { + @Override + public Double call(Tuple2 pl) { + Double diff = pl._1() - pl._2(); + return diff * diff; + } + }).reduce(new Function2() { + @Override + public Double call(Double a, Double b) { + return a + b; + } + }) / testData.count(); + System.out.println("Test Mean Squared Error: " + testMSE); + System.out.println("Learned regression forest model:\n" + model.toDebugString()); + } + + public static void main(String[] args) { + SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestExample"); + JavaSparkContext sc = new JavaSparkContext(sparkConf); + + // Load and parse the data file. + String datapath = "data/mllib/sample_libsvm_data.txt"; + JavaRDD data = MLUtils.loadLibSVMFile(sc.sc(), datapath).toJavaRDD(); + // Split the data into training and test sets (30% held out for testing) + JavaRDD[] splits = data.randomSplit(new double[]{0.7, 0.3}); + JavaRDD trainingData = splits[0]; + JavaRDD testData = splits[1]; + + System.out.println("\nRunning example of classification using RandomForest\n"); + testClassification(trainingData, testData); + + System.out.println("\nRunning example of regression using RandomForest\n"); + testRegression(trainingData, testData); + sc.stop(); + } +} diff --git a/examples/src/main/python/mllib/decision_tree_runner.py b/examples/src/main/python/mllib/decision_tree_runner.py index 61ea4e06ecf3a..fccabd841b139 100755 --- a/examples/src/main/python/mllib/decision_tree_runner.py +++ b/examples/src/main/python/mllib/decision_tree_runner.py @@ -106,8 +106,7 @@ def reindexClassLabels(data): def usage(): print >> sys.stderr, \ - "Usage: decision_tree_runner [libsvm format data filepath]\n" + \ - " Note: This only supports binary classification." + "Usage: decision_tree_runner [libsvm format data filepath]" exit(1) @@ -127,16 +126,20 @@ def usage(): # Re-index class labels if needed. (reindexedData, origToNewLabels) = reindexClassLabels(points) + numClasses = len(origToNewLabels) # Train a classifier. categoricalFeaturesInfo = {} # no categorical features - model = DecisionTree.trainClassifier(reindexedData, numClasses=2, + model = DecisionTree.trainClassifier(reindexedData, numClasses=numClasses, categoricalFeaturesInfo=categoricalFeaturesInfo) # Print learned tree and stats. print "Trained DecisionTree for classification:" - print " Model numNodes: %d\n" % model.numNodes() - print " Model depth: %d\n" % model.depth() - print " Training accuracy: %g\n" % getAccuracy(model, reindexedData) - print model + print " Model numNodes: %d" % model.numNodes() + print " Model depth: %d" % model.depth() + print " Training accuracy: %g" % getAccuracy(model, reindexedData) + if model.numNodes() < 20: + print model.toDebugString() + else: + print model sc.stop() diff --git a/examples/src/main/python/mllib/random_forest_example.py b/examples/src/main/python/mllib/random_forest_example.py new file mode 100755 index 0000000000000..d3c24f7664329 --- /dev/null +++ b/examples/src/main/python/mllib/random_forest_example.py @@ -0,0 +1,89 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Random Forest classification and regression using MLlib. + +Note: This example illustrates binary classification. + For information on multiclass classification, please refer to the decision_tree_runner.py + example. +""" + +import sys + +from pyspark.context import SparkContext +from pyspark.mllib.tree import RandomForest +from pyspark.mllib.util import MLUtils + + +def testClassification(trainingData, testData): + # Train a RandomForest model. + # Empty categoricalFeaturesInfo indicates all features are continuous. + # Note: Use larger numTrees in practice. + # Setting featureSubsetStrategy="auto" lets the algorithm choose. + model = RandomForest.trainClassifier(trainingData, numClasses=2, + categoricalFeaturesInfo={}, + numTrees=3, featureSubsetStrategy="auto", + impurity='gini', maxDepth=4, maxBins=32) + + # Evaluate model on test instances and compute test error + predictions = model.predict(testData.map(lambda x: x.features)) + labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) + testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count()\ + / float(testData.count()) + print('Test Error = ' + str(testErr)) + print('Learned classification forest model:') + print(model.toDebugString()) + + +def testRegression(trainingData, testData): + # Train a RandomForest model. + # Empty categoricalFeaturesInfo indicates all features are continuous. + # Note: Use larger numTrees in practice. + # Setting featureSubsetStrategy="auto" lets the algorithm choose. + model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo={}, + numTrees=3, featureSubsetStrategy="auto", + impurity='variance', maxDepth=4, maxBins=32) + + # Evaluate model on test instances and compute test error + predictions = model.predict(testData.map(lambda x: x.features)) + labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) + testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum()\ + / float(testData.count()) + print('Test Mean Squared Error = ' + str(testMSE)) + print('Learned regression forest model:') + print(model.toDebugString()) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + print >> sys.stderr, "Usage: random_forest_example" + exit(1) + sc = SparkContext(appName="PythonRandomForestExample") + + # Load and parse the data file into an RDD of LabeledPoint. + data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt') + # Split the data into training and test sets (30% held out for testing) + (trainingData, testData) = data.randomSplit([0.7, 0.3]) + + print('\nRunning example of classification using RandomForest\n') + testClassification(trainingData, testData) + + print('\nRunning example of regression using RandomForest\n') + testRegression(trainingData, testData) + + sc.stop() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index 54953adb5f3df..205d80dd02682 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -278,7 +278,7 @@ object DecisionTreeRunner { impurity = impurityCalculator, maxDepth = params.maxDepth, maxBins = params.maxBins, - numClassesForClassification = numClasses, + numClasses = numClasses, minInstancesPerNode = params.minInstancesPerNode, minInfoGain = params.minInfoGain, useNodeIdCache = params.useNodeIdCache, diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala index 1def8b45a230c..431ead8c0c165 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala @@ -103,7 +103,7 @@ object GradientBoostedTreesRunner { params.dataFormat, params.testInput, Algo.withName(params.algo), params.fracTest) val boostingStrategy = BoostingStrategy.defaultParams(params.algo) - boostingStrategy.treeStrategy.numClassesForClassification = numClasses + boostingStrategy.treeStrategy.numClasses = numClasses boostingStrategy.numIterations = params.numIterations boostingStrategy.treeStrategy.maxDepth = params.maxDepth diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 9f20cd5d00dcd..c4e5fd8e461fc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -477,7 +477,7 @@ class PythonMLLibAPI extends Serializable { algo = algo, impurity = impurity, maxDepth = maxDepth, - numClassesForClassification = numClasses, + numClasses = numClasses, maxBins = maxBins, categoricalFeaturesInfo = categoricalFeaturesInfo.asScala.toMap, minInstancesPerNode = minInstancesPerNode, @@ -513,7 +513,7 @@ class PythonMLLibAPI extends Serializable { algo = algo, impurity = impurity, maxDepth = maxDepth, - numClassesForClassification = numClasses, + numClasses = numClasses, maxBins = maxBins, categoricalFeaturesInfo = categoricalFeaturesInfo.asScala.toMap) val cached = data.rdd.persist(StorageLevel.MEMORY_AND_DISK) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index 3d91867c896d9..73e7e32c6db31 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -136,7 +136,7 @@ object DecisionTree extends Serializable with Logging { * @param impurity impurity criterion used for information gain calculation * @param maxDepth Maximum depth of the tree. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. - * @param numClassesForClassification number of classes for classification. Default value of 2. + * @param numClasses number of classes for classification. Default value of 2. * @return DecisionTreeModel that can be used for prediction */ def train( @@ -144,8 +144,8 @@ object DecisionTree extends Serializable with Logging { algo: Algo, impurity: Impurity, maxDepth: Int, - numClassesForClassification: Int): DecisionTreeModel = { - val strategy = new Strategy(algo, impurity, maxDepth, numClassesForClassification) + numClasses: Int): DecisionTreeModel = { + val strategy = new Strategy(algo, impurity, maxDepth, numClasses) new DecisionTree(strategy).run(input) } @@ -164,7 +164,7 @@ object DecisionTree extends Serializable with Logging { * @param impurity criterion used for information gain calculation * @param maxDepth Maximum depth of the tree. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. - * @param numClassesForClassification number of classes for classification. Default value of 2. + * @param numClasses number of classes for classification. Default value of 2. * @param maxBins maximum number of bins used for splitting features * @param quantileCalculationStrategy algorithm for calculating quantiles * @param categoricalFeaturesInfo Map storing arity of categorical features. @@ -177,11 +177,11 @@ object DecisionTree extends Serializable with Logging { algo: Algo, impurity: Impurity, maxDepth: Int, - numClassesForClassification: Int, + numClasses: Int, maxBins: Int, quantileCalculationStrategy: QuantileStrategy, categoricalFeaturesInfo: Map[Int,Int]): DecisionTreeModel = { - val strategy = new Strategy(algo, impurity, maxDepth, numClassesForClassification, maxBins, + val strategy = new Strategy(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo) new DecisionTree(strategy).run(input) } @@ -191,7 +191,7 @@ object DecisionTree extends Serializable with Logging { * * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * Labels should take values {0, 1, ..., numClasses-1}. - * @param numClassesForClassification number of classes for classification. + * @param numClasses number of classes for classification. * @param categoricalFeaturesInfo Map storing arity of categorical features. * E.g., an entry (n -> k) indicates that feature n is categorical * with k categories indexed from 0: {0, 1, ..., k-1}. @@ -206,13 +206,13 @@ object DecisionTree extends Serializable with Logging { */ def trainClassifier( input: RDD[LabeledPoint], - numClassesForClassification: Int, + numClasses: Int, categoricalFeaturesInfo: Map[Int, Int], impurity: String, maxDepth: Int, maxBins: Int): DecisionTreeModel = { val impurityType = Impurities.fromString(impurity) - train(input, Classification, impurityType, maxDepth, numClassesForClassification, maxBins, Sort, + train(input, Classification, impurityType, maxDepth, numClasses, maxBins, Sort, categoricalFeaturesInfo) } @@ -221,12 +221,12 @@ object DecisionTree extends Serializable with Logging { */ def trainClassifier( input: JavaRDD[LabeledPoint], - numClassesForClassification: Int, + numClasses: Int, categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer], impurity: String, maxDepth: Int, maxBins: Int): DecisionTreeModel = { - trainClassifier(input.rdd, numClassesForClassification, + trainClassifier(input.rdd, numClasses, categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap, impurity, maxDepth, maxBins) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index 482d3395516e7..e9304b5e5c650 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -56,7 +56,7 @@ import org.apache.spark.util.Utils * etc. * @param numTrees If 1, then no bootstrapping is used. If > 1, then bootstrapping is done. * @param featureSubsetStrategy Number of features to consider for splits at each node. - * Supported: "auto" (default), "all", "sqrt", "log2", "onethird". + * Supported: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; * if numTrees > 1 (forest) set to "sqrt" for classification and @@ -269,7 +269,7 @@ object RandomForest extends Serializable with Logging { * @param strategy Parameters for training each tree in the forest. * @param numTrees Number of trees in the random forest. * @param featureSubsetStrategy Number of features to consider for splits at each node. - * Supported: "auto" (default), "all", "sqrt", "log2", "onethird". + * Supported: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; * if numTrees > 1 (forest) set to "sqrt". @@ -293,13 +293,13 @@ object RandomForest extends Serializable with Logging { * * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. * Labels should take values {0, 1, ..., numClasses-1}. - * @param numClassesForClassification number of classes for classification. + * @param numClasses number of classes for classification. * @param categoricalFeaturesInfo Map storing arity of categorical features. * E.g., an entry (n -> k) indicates that feature n is categorical * with k categories indexed from 0: {0, 1, ..., k-1}. * @param numTrees Number of trees in the random forest. * @param featureSubsetStrategy Number of features to consider for splits at each node. - * Supported: "auto" (default), "all", "sqrt", "log2", "onethird". + * Supported: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; * if numTrees > 1 (forest) set to "sqrt". @@ -315,7 +315,7 @@ object RandomForest extends Serializable with Logging { */ def trainClassifier( input: RDD[LabeledPoint], - numClassesForClassification: Int, + numClasses: Int, categoricalFeaturesInfo: Map[Int, Int], numTrees: Int, featureSubsetStrategy: String, @@ -325,7 +325,7 @@ object RandomForest extends Serializable with Logging { seed: Int = Utils.random.nextInt()): RandomForestModel = { val impurityType = Impurities.fromString(impurity) val strategy = new Strategy(Classification, impurityType, maxDepth, - numClassesForClassification, maxBins, Sort, categoricalFeaturesInfo) + numClasses, maxBins, Sort, categoricalFeaturesInfo) trainClassifier(input, strategy, numTrees, featureSubsetStrategy, seed) } @@ -334,7 +334,7 @@ object RandomForest extends Serializable with Logging { */ def trainClassifier( input: JavaRDD[LabeledPoint], - numClassesForClassification: Int, + numClasses: Int, categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer], numTrees: Int, featureSubsetStrategy: String, @@ -342,7 +342,7 @@ object RandomForest extends Serializable with Logging { maxDepth: Int, maxBins: Int, seed: Int): RandomForestModel = { - trainClassifier(input.rdd, numClassesForClassification, + trainClassifier(input.rdd, numClasses, categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed) } @@ -355,7 +355,7 @@ object RandomForest extends Serializable with Logging { * @param strategy Parameters for training each tree in the forest. * @param numTrees Number of trees in the random forest. * @param featureSubsetStrategy Number of features to consider for splits at each node. - * Supported: "auto" (default), "all", "sqrt", "log2", "onethird". + * Supported: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; * if numTrees > 1 (forest) set to "onethird". @@ -384,7 +384,7 @@ object RandomForest extends Serializable with Logging { * with k categories indexed from 0: {0, 1, ..., k-1}. * @param numTrees Number of trees in the random forest. * @param featureSubsetStrategy Number of features to consider for splits at each node. - * Supported: "auto" (default), "all", "sqrt", "log2", "onethird". + * Supported: "auto", "all", "sqrt", "log2", "onethird". * If "auto" is set, this parameter is set based on numTrees: * if numTrees == 1, set to "all"; * if numTrees > 1 (forest) set to "onethird". diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index e703adbdbfbb3..cf51d041c65a9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -51,7 +51,7 @@ case class BoostingStrategy( private[tree] def assertValid(): Unit = { treeStrategy.algo match { case Classification => - require(treeStrategy.numClassesForClassification == 2, + require(treeStrategy.numClasses == 2, "Only binary classification is supported for boosting.") case Regression => // nothing @@ -80,12 +80,12 @@ object BoostingStrategy { treeStrategy.maxDepth = 3 algo match { case "Classification" => - treeStrategy.numClassesForClassification = 2 + treeStrategy.numClasses = 2 new BoostingStrategy(treeStrategy, LogLoss) case "Regression" => new BoostingStrategy(treeStrategy, SquaredError) case _ => - throw new IllegalArgumentException(s"$algo is not supported by the boosting.") + throw new IllegalArgumentException(s"$algo is not supported by boosting.") } } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index d75f38433c081..d5cd89ab94e81 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -37,7 +37,7 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ * Supported for Regression: [[org.apache.spark.mllib.tree.impurity.Variance]]. * @param maxDepth Maximum depth of the tree. * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. - * @param numClassesForClassification Number of classes for classification. + * @param numClasses Number of classes for classification. * (Ignored for regression.) * Default value is 2 (binary classification). * @param maxBins Maximum number of bins used for discretizing continuous features and @@ -73,7 +73,7 @@ class Strategy ( @BeanProperty var algo: Algo, @BeanProperty var impurity: Impurity, @BeanProperty var maxDepth: Int, - @BeanProperty var numClassesForClassification: Int = 2, + @BeanProperty var numClasses: Int = 2, @BeanProperty var maxBins: Int = 32, @BeanProperty var quantileCalculationStrategy: QuantileStrategy = Sort, @BeanProperty var categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int](), @@ -86,7 +86,7 @@ class Strategy ( @BeanProperty var checkpointInterval: Int = 10) extends Serializable { def isMulticlassClassification = - algo == Classification && numClassesForClassification > 2 + algo == Classification && numClasses > 2 def isMulticlassWithCategoricalFeatures = isMulticlassClassification && (categoricalFeaturesInfo.size > 0) @@ -97,10 +97,10 @@ class Strategy ( algo: Algo, impurity: Impurity, maxDepth: Int, - numClassesForClassification: Int, + numClasses: Int, maxBins: Int, categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) { - this(algo, impurity, maxDepth, numClassesForClassification, maxBins, Sort, + this(algo, impurity, maxDepth, numClasses, maxBins, Sort, categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap) } @@ -117,8 +117,8 @@ class Strategy ( */ def setCategoricalFeaturesInfo( categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]): Unit = { - setCategoricalFeaturesInfo( - categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap) + this.categoricalFeaturesInfo = + categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap } /** @@ -128,9 +128,9 @@ class Strategy ( private[tree] def assertValid(): Unit = { algo match { case Classification => - require(numClassesForClassification >= 2, - s"DecisionTree Strategy for Classification must have numClassesForClassification >= 2," + - s" but numClassesForClassification = $numClassesForClassification.") + require(numClasses >= 2, + s"DecisionTree Strategy for Classification must have numClasses >= 2," + + s" but numClasses = $numClasses.") require(Set(Gini, Entropy).contains(impurity), s"DecisionTree Strategy given invalid impurity for Classification: $impurity." + s" Valid settings: Gini, Entropy") @@ -160,7 +160,7 @@ class Strategy ( /** Returns a shallow copy of this instance. */ def copy: Strategy = { - new Strategy(algo, impurity, maxDepth, numClassesForClassification, maxBins, + new Strategy(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo, minInstancesPerNode, minInfoGain, maxMemoryInMB, subsamplingRate, useNodeIdCache, checkpointDir, checkpointInterval) } @@ -176,9 +176,9 @@ object Strategy { def defaultStrategy(algo: String): Strategy = algo match { case "Classification" => new Strategy(algo = Classification, impurity = Gini, maxDepth = 10, - numClassesForClassification = 2) + numClasses = 2) case "Regression" => new Strategy(algo = Regression, impurity = Variance, maxDepth = 10, - numClassesForClassification = 0) + numClasses = 0) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala index 5bc0f2635c6b1..951733fada6be 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala @@ -110,7 +110,7 @@ private[tree] object DecisionTreeMetadata extends Logging { val numFeatures = input.take(1)(0).features.size val numExamples = input.count() val numClasses = strategy.algo match { - case Classification => strategy.numClassesForClassification + case Classification => strategy.numClasses case Regression => 0 } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala index 972c905ec9ffa..9347eaf9221a8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala @@ -57,7 +57,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { Classification, Gini, maxDepth = 2, - numClassesForClassification = 2, + numClasses = 2, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 2, 1-> 2)) @@ -81,7 +81,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { Classification, Gini, maxDepth = 2, - numClassesForClassification = 2, + numClasses = 2, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3)) @@ -177,7 +177,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { Classification, Gini, maxDepth = 2, - numClassesForClassification = 100, + numClasses = 100, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3, 1-> 3)) @@ -271,7 +271,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { Classification, Gini, maxDepth = 2, - numClassesForClassification = 100, + numClasses = 100, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 10, 1-> 10)) // 2^(10-1) - 1 > 100, so categorical features will be ordered @@ -295,7 +295,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val strategy = new Strategy( Classification, Gini, - numClassesForClassification = 2, + numClasses = 2, maxDepth = 2, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3, 1-> 3)) @@ -377,7 +377,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { assert(arr.length === 1000) val rdd = sc.parallelize(arr) val strategy = new Strategy(Classification, Gini, maxDepth = 3, - numClassesForClassification = 2, maxBins = 100) + numClasses = 2, maxBins = 100) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) assert(!metadata.isUnordered(featureIndex = 0)) assert(!metadata.isUnordered(featureIndex = 1)) @@ -401,7 +401,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { assert(arr.length === 1000) val rdd = sc.parallelize(arr) val strategy = new Strategy(Classification, Gini, maxDepth = 3, - numClassesForClassification = 2, maxBins = 100) + numClasses = 2, maxBins = 100) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) assert(!metadata.isUnordered(featureIndex = 0)) assert(!metadata.isUnordered(featureIndex = 1)) @@ -426,7 +426,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { assert(arr.length === 1000) val rdd = sc.parallelize(arr) val strategy = new Strategy(Classification, Entropy, maxDepth = 3, - numClassesForClassification = 2, maxBins = 100) + numClasses = 2, maxBins = 100) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) assert(!metadata.isUnordered(featureIndex = 0)) assert(!metadata.isUnordered(featureIndex = 1)) @@ -451,7 +451,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { assert(arr.length === 1000) val rdd = sc.parallelize(arr) val strategy = new Strategy(Classification, Entropy, maxDepth = 3, - numClassesForClassification = 2, maxBins = 100) + numClasses = 2, maxBins = 100) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) assert(!metadata.isUnordered(featureIndex = 0)) assert(!metadata.isUnordered(featureIndex = 1)) @@ -485,7 +485,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { // Train a 1-node model val strategyOneNode = new Strategy(Classification, Entropy, maxDepth = 1, - numClassesForClassification = 2, maxBins = 100) + numClasses = 2, maxBins = 100) val modelOneNode = DecisionTree.train(rdd, strategyOneNode) val rootNode1 = modelOneNode.topNode.deepCopy() val rootNode2 = modelOneNode.topNode.deepCopy() @@ -545,7 +545,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlass() val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 3, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3)) + numClasses = 3, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3)) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) assert(strategy.isMulticlassClassification) assert(metadata.isUnordered(featureIndex = 0)) @@ -568,7 +568,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { arr(3) = new LabeledPoint(1.0, Vectors.dense(3.0)) val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 2) + numClasses = 2) val model = DecisionTree.train(rdd, strategy) DecisionTreeSuite.validateClassifier(model, arr, 1.0) @@ -585,7 +585,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 2) + numClasses = 2) val model = DecisionTree.train(rdd, strategy) DecisionTreeSuite.validateClassifier(model, arr, 1.0) @@ -600,7 +600,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlass() val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 3, maxBins = maxBins, + numClasses = 3, maxBins = maxBins, categoricalFeaturesInfo = Map(0 -> 3, 1 -> 3)) assert(strategy.isMulticlassClassification) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) @@ -629,7 +629,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val arr = DecisionTreeSuite.generateContinuousDataPointsForMulticlass() val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 3, maxBins = 100) + numClasses = 3, maxBins = 100) assert(strategy.isMulticlassClassification) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) @@ -650,7 +650,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val arr = DecisionTreeSuite.generateContinuousDataPointsForMulticlass() val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3)) + numClasses = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 3)) assert(strategy.isMulticlassClassification) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) assert(metadata.isUnordered(featureIndex = 0)) @@ -671,7 +671,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures() val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 3, maxBins = 100, + numClasses = 3, maxBins = 100, categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10)) assert(strategy.isMulticlassClassification) val metadata = DecisionTreeMetadata.buildMetadata(rdd, strategy) @@ -692,7 +692,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val arr = DecisionTreeSuite.generateCategoricalDataPointsForMulticlassForOrderedFeatures() val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 4, - numClassesForClassification = 3, maxBins = 10, + numClasses = 3, maxBins = 10, categoricalFeaturesInfo = Map(0 -> 10, 1 -> 10)) assert(strategy.isMulticlassClassification) @@ -708,7 +708,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, - maxDepth = 2, numClassesForClassification = 2, minInstancesPerNode = 2) + maxDepth = 2, numClasses = 2, minInstancesPerNode = 2) val model = DecisionTree.train(rdd, strategy) assert(model.topNode.isLeaf) @@ -737,7 +737,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val rdd = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxBins = 2, maxDepth = 2, categoricalFeaturesInfo = Map(0 -> 2, 1-> 2), - numClassesForClassification = 2, minInstancesPerNode = 2) + numClasses = 2, minInstancesPerNode = 2) val rootNode = DecisionTree.train(rdd, strategy).topNode @@ -755,7 +755,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val input = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2, - numClassesForClassification = 2, minInfoGain = 1.0) + numClasses = 2, minInfoGain = 1.0) val model = DecisionTree.train(input, strategy) assert(model.topNode.isLeaf) @@ -781,7 +781,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val input = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 1, - numClassesForClassification = 2, categoricalFeaturesInfo = Map(0 -> 3)) + numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3)) val metadata = DecisionTreeMetadata.buildMetadata(input, strategy) val (splits, bins) = DecisionTree.findSplitsBins(input, metadata) @@ -824,7 +824,7 @@ class DecisionTreeSuite extends FunSuite with MLlibTestSparkContext { val input = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 5, - numClassesForClassification = 2, categoricalFeaturesInfo = Map(0 -> 3)) + numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3)) val metadata = DecisionTreeMetadata.buildMetadata(input, strategy) val (splits, bins) = DecisionTree.findSplitsBins(input, metadata) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala index d4d54cf4c9e2a..3aa97e544680b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala @@ -100,7 +100,7 @@ class GradientBoostedTreesSuite extends FunSuite with MLlibTestSparkContext { val rdd = sc.parallelize(GradientBoostedTreesSuite.data, 2) val treeStrategy = new Strategy(algo = Classification, impurity = Variance, maxDepth = 2, - numClassesForClassification = 2, categoricalFeaturesInfo = Map.empty, + numClasses = 2, categoricalFeaturesInfo = Map.empty, subsamplingRate = subsamplingRate) val boostingStrategy = new BoostingStrategy(treeStrategy, LogLoss, numIterations, learningRate) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala index 90a8c2dfdab80..f7f0f20c6c125 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala @@ -57,7 +57,7 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { " comparing DecisionTree vs. RandomForest(numTrees = 1)") { val categoricalFeaturesInfo = Map.empty[Int, Int] val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2, - numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo) + numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo) binaryClassificationTestWithContinuousFeatures(strategy) } @@ -65,7 +65,7 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { " comparing DecisionTree vs. RandomForest(numTrees = 1)") { val categoricalFeaturesInfo = Map.empty[Int, Int] val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2, - numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, + numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true) binaryClassificationTestWithContinuousFeatures(strategy) } @@ -93,7 +93,7 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { " comparing DecisionTree vs. RandomForest(numTrees = 1)") { val categoricalFeaturesInfo = Map.empty[Int, Int] val strategy = new Strategy(algo = Regression, impurity = Variance, - maxDepth = 2, maxBins = 10, numClassesForClassification = 2, + maxDepth = 2, maxBins = 10, numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo) regressionTestWithContinuousFeatures(strategy) } @@ -102,7 +102,7 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { " comparing DecisionTree vs. RandomForest(numTrees = 1)") { val categoricalFeaturesInfo = Map.empty[Int, Int] val strategy = new Strategy(algo = Regression, impurity = Variance, - maxDepth = 2, maxBins = 10, numClassesForClassification = 2, + maxDepth = 2, maxBins = 10, numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true) regressionTestWithContinuousFeatures(strategy) } @@ -169,14 +169,14 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { test("Binary classification with continuous features: subsampling features") { val categoricalFeaturesInfo = Map.empty[Int, Int] val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2, - numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo) + numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo) binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy) } test("Binary classification with continuous features and node Id cache: subsampling features") { val categoricalFeaturesInfo = Map.empty[Int, Int] val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2, - numClassesForClassification = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, + numClasses = 2, categoricalFeaturesInfo = categoricalFeaturesInfo, useNodeIdCache = true) binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy) } @@ -191,7 +191,7 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { val input = sc.parallelize(arr) val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 5, - numClassesForClassification = 3, categoricalFeaturesInfo = categoricalFeaturesInfo) + numClasses = 3, categoricalFeaturesInfo = categoricalFeaturesInfo) val model = RandomForest.trainClassifier(input, strategy, numTrees = 2, featureSubsetStrategy = "sqrt", seed = 12345) EnsembleTestHelper.validateClassifier(model, arr, 1.0) diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index 46e253991aa56..66702478474dc 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -250,7 +250,7 @@ def _train(cls, data, algo, numClasses, categoricalFeaturesInfo, numTrees, return RandomForestModel(model) @classmethod - def trainClassifier(cls, data, numClassesForClassification, categoricalFeaturesInfo, numTrees, + def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy="auto", impurity="gini", maxDepth=4, maxBins=32, seed=None): """ @@ -259,7 +259,7 @@ def trainClassifier(cls, data, numClassesForClassification, categoricalFeaturesI :param data: Training dataset: RDD of LabeledPoint. Labels should take values {0, 1, ..., numClasses-1}. - :param numClassesForClassification: number of classes for classification. + :param numClasses: number of classes for classification. :param categoricalFeaturesInfo: Map storing arity of categorical features. E.g., an entry (n -> k) indicates that feature n is categorical with k categories indexed from 0: {0, 1, ..., k-1}. @@ -320,7 +320,7 @@ def trainClassifier(cls, data, numClassesForClassification, categoricalFeaturesI >>> model.predict(rdd).collect() [1.0, 0.0] """ - return cls._train(data, "classification", numClassesForClassification, + return cls._train(data, "classification", numClasses, categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed) From a4dfb4efef89f686cbf146db42c2d891fef42500 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 3 Dec 2014 19:08:29 -0800 Subject: [PATCH 57/82] [Release] Correctly translate contributors name in release notes This commit involves three main changes: (1) It separates the translation of contributor names from the generation of the contributors list. This is largely motivated by the Github API limit; even if we exceed this limit, we should at least be able to proceed manually as before. This is why the translation logic is abstracted into its own script translate-contributors.py. (2) When we look for candidate replacements for invalid author names, we should look for the assignees of the associated JIRAs too. As a result, the intermediate file must keep track of these. (3) This provides an interactive mode with which the user can sit at the terminal and manually pick the candidate replacement that he/she thinks makes the most sense. As before, there is a non-interactive mode that picks the first candidate that the script considers "valid." TODO: We should have a known_contributors file that stores known mappings so we don't have to go through all of this translation every time. This is also valuable because some contributors simply cannot be automatically translated. --- .gitignore | 3 + dev/create-release/generate-contributors.py | 52 +++-- dev/create-release/releaseutils.py | 39 +--- dev/create-release/translate-contributors.py | 190 +++++++++++++++++++ 4 files changed, 229 insertions(+), 55 deletions(-) create mode 100755 dev/create-release/translate-contributors.py diff --git a/.gitignore b/.gitignore index c67cffa1c4375..3b9086c7187dc 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.ipr *.iml *.iws +*.pyc .idea/ .idea_modules/ sbt/*.jar @@ -49,6 +50,8 @@ dependency-reduced-pom.xml checkpoint derby.log dist/ +dev/create-release/*txt +dev/create-release/*new spark-*-bin-*.tgz unit-tests.log /lib/ diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py index 99c29ef9ff8b6..a3b78a3eac6d0 100755 --- a/dev/create-release/generate-contributors.py +++ b/dev/create-release/generate-contributors.py @@ -26,8 +26,6 @@ # You must set the following before use! JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") -JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None) -JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None) START_COMMIT = os.environ.get("START_COMMIT", "37b100") END_COMMIT = os.environ.get("END_COMMIT", "3693ae") @@ -40,8 +38,6 @@ END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ") # Verify provided arguments -if not JIRA_USERNAME: sys.exit("JIRA_USERNAME must be provided") -if not JIRA_PASSWORD: sys.exit("JIRA_PASSWORD must be provided") start_commit_line = get_one_line(START_COMMIT) end_commit_line = get_one_line(END_COMMIT) num_commits = num_commits_in_range(START_COMMIT, END_COMMIT) @@ -60,14 +56,6 @@ sys.exit("Ok, exiting") print "==================================================================================\n" -# Setup JIRA and github clients. We use two JIRA clients, one with authentication -# and one without, because authentication is slow and required only when we query -# JIRA user details but not Spark issues -jira_options = { "server": JIRA_API_BASE } -jira_client = JIRA(options = jira_options) -jira_client_auth = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD)) -github_client = Github() - # Find all commits within this range print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT) commits = get_one_line_commits(START_COMMIT, END_COMMIT) @@ -105,13 +93,17 @@ def print_indented(_list): if reverts: print "Reverts (%d)" % len(reverts); print_indented(reverts) if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras) print "==================== Warning: the above commits will be ignored ==================\n" -response = raw_input("%d commits left to process. Ok to proceed? [y/N] " % len(filtered_commits)) -if response.lower() != "y": +response = raw_input("%d commits left to process. Ok to proceed? [Y/n] " % len(filtered_commits)) +if response.lower() != "y" and response: sys.exit("Ok, exiting.") # Keep track of warnings to tell the user at the end warnings = [] +# Mapping from the invalid author name to its associated JIRA issues +# E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471") +invalid_authors = {} + # Populate a map that groups issues and components by author # It takes the form: Author name -> { Contribution type -> Spark components } # For instance, @@ -127,16 +119,23 @@ def print_indented(_list): # } # author_info = {} +jira_options = { "server": JIRA_API_BASE } +jira_client = JIRA(options = jira_options) print "\n=========================== Compiling contributor list ===========================" for commit in filtered_commits: commit_hash = re.findall("^[a-z0-9]+", commit)[0] issues = re.findall("SPARK-[0-9]+", commit.upper()) - # Translate the author in case the github username is not an actual name - # Also guard against any special characters used in the name - # Note the JIRA client we use here must have authentication enabled author = get_author(commit_hash) - author = unidecode.unidecode(unicode(author, "UTF-8")) - author = translate_author(author, github_client, jira_client_auth, warnings) + author = unidecode.unidecode(unicode(author, "UTF-8")).strip() # guard against special characters + # If the author name is invalid, keep track of it along + # with all associated issues so we can translate it later + if is_valid_author(author): + author = capitalize_author(author) + else: + if author not in invalid_authors: + invalid_authors[author] = set() + for issue in issues: + invalid_authors[author].add(issue) date = get_date(commit_hash) # Parse components from the commit message, if any commit_components = find_components(commit, commit_hash) @@ -147,7 +146,7 @@ def populate(issue_type, components): author_info[author] = {} if issue_type not in author_info[author]: author_info[author][issue_type] = set() - for component in all_components: + for component in components: author_info[author][issue_type].add(component) # Find issues and components associated with this commit for issue in issues: @@ -168,7 +167,6 @@ def populate(issue_type, components): # Each line takes the format "Author name - semi-colon delimited contributions" # e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core # e.g. Tathagata Das - Bug fixes and new features in Streaming -contributors_file_name = "contributors.txt" contributors_file = open(contributors_file_name, "w") authors = author_info.keys() authors.sort() @@ -192,11 +190,23 @@ def populate(issue_type, components): # Do not use python's capitalize() on the whole string to preserve case assert contribution contribution = contribution[0].capitalize() + contribution[1:] + # If the author name is invalid, use an intermediate format that + # can be translated through translate-contributors.py later + # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672 + if author in invalid_authors and invalid_authors[author]: + author = author + "/" + "/".join(invalid_authors[author]) line = "%s - %s" % (author, contribution) contributors_file.write(line + "\n") contributors_file.close() print "Contributors list is successfully written to %s!" % contributors_file_name +# Prompt the user to translate author names if necessary +if invalid_authors: + warnings.append("Found the following invalid authors:") + for a in invalid_authors: + warnings.append("\t%s" % a) + warnings.append("Please run './translate-contributors.py' to translate them.") + # Log any warnings encountered in the process if warnings: print "\n============ Warnings encountered while creating the contributor list ============" diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py index 0d6830b11dc73..76a10c32886d4 100755 --- a/dev/create-release/releaseutils.py +++ b/dev/create-release/releaseutils.py @@ -44,6 +44,9 @@ print "Install using 'sudo pip install unidecode'" sys.exit(-1) +# Contributors list file name +contributors_file_name = "contributors.txt" + # Utility functions run git commands (written with Git 1.8.5) def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0] def get_author(commit_hash): @@ -69,7 +72,8 @@ def num_commits_in_range(start_hash, end_hash): "build": "build fixes", "improvement": "improvements", "new feature": "new features", - "documentation": "documentation" + "documentation": "documentation", + "test": "test" } # Maintain a mapping for translating component names when creating the release notes @@ -182,36 +186,3 @@ def capitalize_author(author): words = [w[0].capitalize() + w[1:] for w in words if w] return " ".join(words) -# Maintain a mapping of translated author names as a cache -translated_authors = {} - -# Format the given author in a format appropriate for the contributors list. -# If the author is not an actual name, search github and JIRA for potential -# replacements and log all candidates as a warning. -def translate_author(github_author, github_client, jira_client, warnings): - if is_valid_author(github_author): - return capitalize_author(github_author) - # If the translated author is already cached, just return it - if github_author in translated_authors: - return translated_authors[github_author] - # Otherwise, author name is not found, so we need to search for an alternative name - candidates = set() - github_name = get_github_name(github_author, github_client) - jira_name = get_jira_name(github_author, jira_client) - if is_valid_author(github_name): github_name = capitalize_author(github_name) - if is_valid_author(jira_name): jira_name = capitalize_author(jira_name) - if github_name: candidates.add(github_name) - if jira_name: candidates.add(jira_name) - # Only use the github name as a replacement automatically - # The JIRA name may not make sense because it can belong to someone else - if is_valid_author(github_name): - candidates_message = " (another candidate is %s)" % jira_name if jira_name else "" - warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message)) - translated_authors[github_name] = github_name - return translated_authors[github_name] - # No direct replacement, so return the original author and list any candidates found - candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else "" - warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message)) - translated_authors[github_author] = github_author - return translated_authors[github_author] - diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py new file mode 100755 index 0000000000000..ef4625b003cb6 --- /dev/null +++ b/dev/create-release/translate-contributors.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script translates invalid authors in the contributors list generated +# by generate-contributors.py. When the script encounters an author name that +# is considered invalid, it searches Github and JIRA in an attempt to search +# for replacements. This tool runs in two modes: +# +# (1) Interactive mode: For each invalid author name, this script presents +# all candidate replacements to the user and awaits user response. In this +# mode, the user may also input a custom name. This is the default. +# +# (2) Non-interactive mode: For each invalid author name, this script replaces +# the name with the first valid candidate it can find. If there is none, it +# uses the original name. This can be enabled through the --non-interactive flag. + +import os +import sys + +from releaseutils import * + +# You must set the following before use! +JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") +JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None) +JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None) +if not JIRA_USERNAME or not JIRA_PASSWORD: + sys.exit("Both JIRA_USERNAME and JIRA_PASSWORD must be set") + +# Write new contributors list to .new +if not os.path.isfile(contributors_file_name): + print "Contributors file %s does not exist!" % contributors_file_name + print "Have you run ./generate-contributors.py yet?" + sys.exit(1) +contributors_file = open(contributors_file_name, "r") +new_contributors_file_name = contributors_file_name + ".new" +new_contributors_file = open(new_contributors_file_name, "w") +warnings = [] + +# In non-interactive mode, this script will choose the first replacement that is valid +INTERACTIVE_MODE = True +if len(sys.argv) > 1: + options = set(sys.argv[1:]) + if "--non-interactive" in options: + INTERACTIVE_MODE = False +if INTERACTIVE_MODE: + print "Running in interactive mode. To disable this, provide the --non-interactive flag." + +# Setup Github and JIRA clients +jira_options = { "server": JIRA_API_BASE } +jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD)) +github_client = Github() + +# Generate candidates for the given author. This should only be called if the given author +# name does not represent a full name as this operation is somewhat expensive. Under the +# hood, it makes several calls to the Github and JIRA API servers to find the candidates. +# +# This returns a list of (candidate name, source) 2-tuples. E.g. +# [ +# (NOT_FOUND, "No full name found for Github user andrewor14"), +# ("Andrew Or", "Full name of JIRA user andrewor14"), +# ("Andrew Orso", "Full name of SPARK-1444 assignee andrewor14"), +# ("Andrew Ordall", "Full name of SPARK-1663 assignee andrewor14"), +# (NOT_FOUND, "No assignee found for SPARK-1763") +# ] +NOT_FOUND = "Not found" +def generate_candidates(author, issues): + candidates = [] + # First check for full name of Github user + github_name = get_github_name(new_author, github_client) + if github_name: + candidates.append((github_name, "Full name of Github user %s" % new_author)) + else: + candidates.append((NOT_FOUND, "No full name found for Github user %s" % new_author)) + # Then do the same for JIRA user + jira_name = get_jira_name(new_author, jira_client) + if jira_name: + candidates.append((jira_name, "Full name of JIRA user %s" % new_author)) + else: + candidates.append((NOT_FOUND, "No full name found for JIRA user %s" % new_author)) + # Then do the same for the assignee of each of the associated JIRAs + # Note that a given issue may not have an assignee, or the assignee may not have a full name + for issue in issues: + jira_issue = jira_client.issue(issue) + jira_assignee = jira_issue.fields.assignee + if jira_assignee: + user_name = jira_assignee.name + display_name = jira_assignee.displayName + if display_name: + candidates.append((display_name, "Full name of %s assignee %s" % (issue, user_name))) + else: + candidates.append((NOT_FOUND, "No full name found for %s assignee %" % (issue, user_name))) + else: + candidates.append((NOT_FOUND, "No assignee found for %s" % issue)) + # Guard against special characters in candidate names + # Note that the candidate name may already be in unicode (JIRA returns this) + for i, (candidate, source) in enumerate(candidates): + try: + candidate = unicode(candidate, "UTF-8") + except TypeError: + # already in unicode + pass + candidate = unidecode.unidecode(candidate).strip() + candidates[i] = (candidate, source) + return candidates + +# Translate each invalid author by searching for possible candidates from Github and JIRA +# In interactive mode, this script presents the user with a list of choices and have the user +# select from this list. Additionally, the user may also choose to enter a custom name. +# In non-interactive mode, this script picks the first valid author name from the candidates +# If no such name exists, the original name is used (without the JIRA numbers). +print "\n========================== Translating contributor list ==========================" +for line in contributors_file: + author = line.split(" - ")[0] + print "Processing author %s" % author + if not author: + print " ERROR: Expected the following format - " + print " ERROR: Actual = %s" % line + if not is_valid_author(author): + new_author = author.split("/")[0] + issues = author.split("/")[1:] + candidates = generate_candidates(new_author, issues) + # Print out potential replacement candidates along with the sources, e.g. + # [X] No full name found for Github user andrewor14 + # [0] Andrew Or - Full name of JIRA user andrewor14 + # [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14 + # [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14 + # [X] No assignee found for SPARK-1763 + # [3] Custom + candidate_names = [] + for candidate, source in candidates: + if candidate == NOT_FOUND: + print " [X] %s" % source + else: + index = len(candidate_names) + candidate_names.append(candidate) + print " [%d] %s - %s" % (index, candidate, source) + custom_index = len(candidate_names) + # In interactive mode, additionally provide "custom" option and await user response + if INTERACTIVE_MODE: + print " [%d] Custom" % custom_index + response = raw_input(" Your choice: ") + while not response.isdigit() or int(response) > custom_index: + response = raw_input(" Please enter an integer between 0 and %d: " % custom_index) + response = int(response) + if response == custom_index: + new_author = raw_input(" Please type a custom name for this author: ") + else: + new_author = candidate_names[response] + # In non-interactive mode, just pick the first candidate + else: + valid_candidate_names = [name for name, _ in candidates\ + if is_valid_author(name) and name != NOT_FOUND] + if valid_candidate_names: + new_author = valid_candidate_names[0] + # Finally, capitalize the author and replace the original one with it + # If the final replacement is still invalid, log a warning + if is_valid_author(new_author): + new_author = capitalize_author(new_author) + else: + warnings.append("Unable to find a valid name %s for author %s" % (new_author, author)) + print " * Replacing %s with %s" % (author, new_author) + line = line.replace(author, new_author) + new_contributors_file.write(line) +print "==================================================================================\n" +contributors_file.close() +new_contributors_file.close() + +print "Translated contributors list successfully written to %s!" % new_contributors_file_name + +# Log any warnings encountered in the process +if warnings: + print "\n========== Warnings encountered while translating the contributor list ===========" + for w in warnings: print w + print "Please manually correct these in the final contributors list at %s." % new_contributors_file_name + print "==================================================================================\n" + From 3cdae038f1c233647e074858a6b7b710edf15d78 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Wed, 3 Dec 2014 22:15:46 -0800 Subject: [PATCH 58/82] MAINTENANCE: Automated closing of pull requests. This commit exists to close the following pull requests on Github: Closes #1875 (close requested by 'marmbrus') Closes #3566 (close requested by 'andrewor14') Closes #3487 (close requested by 'pwendell') From ed88db4cb21d029ca14ebc428fae122adf5128f0 Mon Sep 17 00:00:00 2001 From: Jacky Li Date: Thu, 4 Dec 2014 00:43:55 -0800 Subject: [PATCH 59/82] [SQL] remove unnecessary import Author: Jacky Li Closes #3585 from jackylk/remove and squashes the following commits: 045423d [Jacky Li] remove unnecessary import --- sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f83e647014193..bcebce7603f13 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -22,7 +22,6 @@ import java.util.TimeZone import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.catalyst.errors.TreeNodeException -import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan /* Implicits */ From c3ad48603632a039a51be3d33e917105149fdd7a Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 4 Dec 2014 00:45:57 -0800 Subject: [PATCH 60/82] [SPARK-4719][API] Consolidate various narrow dep RDD classes with MapPartitionsRDD MappedRDD, MappedValuesRDD, FlatMappedValuesRDD, FilteredRDD, GlommedRDD, FlatMappedRDD are not necessary. They can be implemented trivially using MapPartitionsRDD. Author: Reynold Xin Closes #3578 from rxin/SPARK-4719 and squashes the following commits: eed9853 [Reynold Xin] Preserve partitioning for filter. eb1a89b [Reynold Xin] [SPARK-4719][API] Consolidate various narrow dep RDD classes with MapPartitionsRDD. --- .../org/apache/spark/rdd/BinaryFileRDD.scala | 12 ++--- .../org/apache/spark/rdd/FilteredRDD.scala | 35 ------------- .../org/apache/spark/rdd/FlatMappedRDD.scala | 34 ------------- .../spark/rdd/FlatMappedValuesRDD.scala | 35 ------------- .../org/apache/spark/rdd/GlommedRDD.scala | 31 ------------ .../org/apache/spark/rdd/MappedRDD.scala | 32 ------------ .../apache/spark/rdd/MappedValuesRDD.scala | 33 ------------ .../apache/spark/rdd/PairRDDFunctions.scala | 10 +++- .../main/scala/org/apache/spark/rdd/RDD.scala | 28 ++++++++--- .../scala/org/apache/spark/rdd/RDDSuite.scala | 50 ++++++++----------- 10 files changed, 55 insertions(+), 245 deletions(-) delete mode 100644 core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala delete mode 100644 core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala delete mode 100644 core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala delete mode 100644 core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala delete mode 100644 core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala delete mode 100644 core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala index 6e66ddbdef788..1f755db485812 100644 --- a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala @@ -24,12 +24,12 @@ import org.apache.spark.input.StreamFileInputFormat import org.apache.spark.{ Partition, SparkContext } private[spark] class BinaryFileRDD[T]( - sc: SparkContext, - inputFormatClass: Class[_ <: StreamFileInputFormat[T]], - keyClass: Class[String], - valueClass: Class[T], - @transient conf: Configuration, - minPartitions: Int) + sc: SparkContext, + inputFormatClass: Class[_ <: StreamFileInputFormat[T]], + keyClass: Class[String], + valueClass: Class[T], + @transient conf: Configuration, + minPartitions: Int) extends NewHadoopRDD[String, T](sc, inputFormatClass, keyClass, valueClass, conf) { override def getPartitions: Array[Partition] = { diff --git a/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala deleted file mode 100644 index 9e41b3d1e2d4f..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import scala.reflect.ClassTag - -import org.apache.spark.{Partition, TaskContext} - -private[spark] class FilteredRDD[T: ClassTag]( - prev: RDD[T], - f: T => Boolean) - extends RDD[T](prev) { - - override def getPartitions: Array[Partition] = firstParent[T].partitions - - override val partitioner = prev.partitioner // Since filter cannot change a partition's keys - - override def compute(split: Partition, context: TaskContext) = - firstParent[T].iterator(split, context).filter(f) -} diff --git a/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala deleted file mode 100644 index d8f87d4e3690e..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import scala.reflect.ClassTag - -import org.apache.spark.{Partition, TaskContext} - -private[spark] -class FlatMappedRDD[U: ClassTag, T: ClassTag]( - prev: RDD[T], - f: T => TraversableOnce[U]) - extends RDD[U](prev) { - - override def getPartitions: Array[Partition] = firstParent[T].partitions - - override def compute(split: Partition, context: TaskContext) = - firstParent[T].iterator(split, context).flatMap(f) -} diff --git a/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala deleted file mode 100644 index 7c9023f62d3b6..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import org.apache.spark.{Partition, TaskContext} - -private[spark] -class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => TraversableOnce[U]) - extends RDD[(K, U)](prev) { - - override def getPartitions = firstParent[Product2[K, V]].partitions - - override val partitioner = firstParent[Product2[K, V]].partitioner - - override def compute(split: Partition, context: TaskContext) = { - firstParent[Product2[K, V]].iterator(split, context).flatMap { case Product2(k, v) => - f(v).map(x => (k, x)) - } - } -} diff --git a/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala deleted file mode 100644 index f6463fa715a71..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import scala.reflect.ClassTag - -import org.apache.spark.{Partition, TaskContext} - -private[spark] class GlommedRDD[T: ClassTag](prev: RDD[T]) - extends RDD[Array[T]](prev) { - - override def getPartitions: Array[Partition] = firstParent[T].partitions - - override def compute(split: Partition, context: TaskContext) = - Array(firstParent[T].iterator(split, context).toArray).iterator -} diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala deleted file mode 100644 index 8d7c288593665..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import scala.reflect.ClassTag - -import org.apache.spark.{Partition, TaskContext} - -private[spark] -class MappedRDD[U: ClassTag, T: ClassTag](prev: RDD[T], f: T => U) - extends RDD[U](prev) { - - override def getPartitions: Array[Partition] = firstParent[T].partitions - - override def compute(split: Partition, context: TaskContext) = - firstParent[T].iterator(split, context).map(f) -} diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala deleted file mode 100644 index a60952eee5901..0000000000000 --- a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.rdd - -import org.apache.spark.{Partition, TaskContext} - -private[spark] -class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U) - extends RDD[(K, U)](prev) { - - override def getPartitions = firstParent[Product2[K, U]].partitions - - override val partitioner = firstParent[Product2[K, U]].partitioner - - override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = { - firstParent[Product2[K, V]].iterator(split, context).map { pair => (pair._1, f(pair._2)) } - } -} diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index e78e57678852f..c43e1f2fe135e 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -660,7 +660,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) */ def mapValues[U](f: V => U): RDD[(K, U)] = { val cleanF = self.context.clean(f) - new MappedValuesRDD(self, cleanF) + new MapPartitionsRDD[(K, U), (K, V)](self, + (context, pid, iter) => iter.map { case (k, v) => (k, cleanF(v)) }, + preservesPartitioning = true) } /** @@ -669,7 +671,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) */ def flatMapValues[U](f: V => TraversableOnce[U]): RDD[(K, U)] = { val cleanF = self.context.clean(f) - new FlatMappedValuesRDD(self, cleanF) + new MapPartitionsRDD[(K, U), (K, V)](self, + (context, pid, iter) => iter.flatMap { case (k, v) => + cleanF(v).map(x => (k, x)) + }, + preservesPartitioning = true) } /** diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 8dfd952298f30..0bd616ec24fcb 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -17,7 +17,7 @@ package org.apache.spark.rdd -import java.util.{Properties, Random} +import java.util.Random import scala.collection.{mutable, Map} import scala.collection.mutable.ArrayBuffer @@ -36,13 +36,12 @@ import org.apache.spark._ import org.apache.spark.Partitioner._ import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.JavaRDD -import org.apache.spark.broadcast.Broadcast import org.apache.spark.partial.BoundedDouble import org.apache.spark.partial.CountEvaluator import org.apache.spark.partial.GroupedCountEvaluator import org.apache.spark.partial.PartialResult import org.apache.spark.storage.StorageLevel -import org.apache.spark.util.{BoundedPriorityQueue, Utils, CallSite} +import org.apache.spark.util.{BoundedPriorityQueue, Utils} import org.apache.spark.util.collection.OpenHashMap import org.apache.spark.util.random.{BernoulliSampler, PoissonSampler, BernoulliCellSampler, SamplingUtils} @@ -270,19 +269,30 @@ abstract class RDD[T: ClassTag]( /** * Return a new RDD by applying a function to all elements of this RDD. */ - def map[U: ClassTag](f: T => U): RDD[U] = new MappedRDD(this, sc.clean(f)) + def map[U: ClassTag](f: T => U): RDD[U] = { + val cleanF = sc.clean(f) + new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.map(cleanF)) + } /** * Return a new RDD by first applying a function to all elements of this * RDD, and then flattening the results. */ - def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = - new FlatMappedRDD(this, sc.clean(f)) + def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = { + val cleanF = sc.clean(f) + new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.flatMap(cleanF)) + } /** * Return a new RDD containing only the elements that satisfy a predicate. */ - def filter(f: T => Boolean): RDD[T] = new FilteredRDD(this, sc.clean(f)) + def filter(f: T => Boolean): RDD[T] = { + val cleanF = sc.clean(f) + new MapPartitionsRDD[T, T]( + this, + (context, pid, iter) => iter.filter(cleanF), + preservesPartitioning = true) + } /** * Return a new RDD containing the distinct elements in this RDD. @@ -503,7 +513,9 @@ abstract class RDD[T: ClassTag]( /** * Return an RDD created by coalescing all elements within each partition into an array. */ - def glom(): RDD[Array[T]] = new GlommedRDD(this) + def glom(): RDD[Array[T]] = { + new MapPartitionsRDD[Array[T], T](this, (context, pid, iter) => Iterator(iter.toArray)) + } /** * Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index 6d9be796144b6..46fcb80fa1845 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -24,10 +24,9 @@ import scala.reflect.ClassTag import org.scalatest.FunSuite import org.apache.spark._ -import org.apache.spark.util.Utils - import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.rdd.RDDSuiteUtils._ +import org.apache.spark.util.Utils class RDDSuite extends FunSuite with SharedSparkContext { @@ -37,8 +36,8 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(nums.toLocalIterator.toList === List(1, 2, 3, 4)) val dups = sc.makeRDD(Array(1, 1, 2, 2, 3, 3, 4, 4), 2) assert(dups.distinct().count() === 4) - assert(dups.distinct.count === 4) // Can distinct and count be called without parentheses? - assert(dups.distinct.collect === dups.distinct().collect) + assert(dups.distinct().count === 4) // Can distinct and count be called without parentheses? + assert(dups.distinct().collect === dups.distinct().collect) assert(dups.distinct(2).collect === dups.distinct().collect) assert(nums.reduce(_ + _) === 10) assert(nums.fold(0)(_ + _) === 10) @@ -617,9 +616,9 @@ class RDDSuite extends FunSuite with SharedSparkContext { for(seed <- 1 to 5) { val splits = data.randomSplit(Array(1.0, 2.0, 3.0), seed) assert(splits.size == 3, "wrong number of splits") - assert(splits.flatMap(_.collect).sorted.toList == data.collect.toList, + assert(splits.flatMap(_.collect()).sorted.toList == data.collect().toList, "incomplete or wrong split") - val s = splits.map(_.count) + val s = splits.map(_.count()) assert(math.abs(s(0) - 100) < 50) // std = 9.13 assert(math.abs(s(1) - 200) < 50) // std = 11.55 assert(math.abs(s(2) - 300) < 50) // std = 12.25 @@ -762,8 +761,8 @@ class RDDSuite extends FunSuite with SharedSparkContext { val rdd3 = rdd2.map(_ + 1) val rdd4 = new UnionRDD(sc, List(rdd1, rdd2, rdd3)) assert(rdd4.parent(0).isInstanceOf[ParallelCollectionRDD[_]]) - assert(rdd4.parent(1).isInstanceOf[FilteredRDD[_]]) - assert(rdd4.parent(2).isInstanceOf[MappedRDD[_, _]]) + assert(rdd4.parent[Int](1) === rdd2) + assert(rdd4.parent[Int](2) === rdd3) } test("getNarrowAncestors") { @@ -781,20 +780,18 @@ class RDDSuite extends FunSuite with SharedSparkContext { // Simple dependency tree with a single branch assert(ancestors1.size === 0) assert(ancestors2.size === 2) - assert(ancestors2.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 1) - assert(ancestors2.count(_.isInstanceOf[FilteredRDD[_]]) === 1) + assert(ancestors2.count(_ === rdd1) === 1) + assert(ancestors2.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 1) assert(ancestors3.size === 5) - assert(ancestors3.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 1) - assert(ancestors3.count(_.isInstanceOf[FilteredRDD[_]]) === 2) - assert(ancestors3.count(_.isInstanceOf[MappedRDD[_, _]]) === 2) + assert(ancestors3.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 4) // Any ancestors before the shuffle are not considered assert(ancestors4.size === 0) assert(ancestors4.count(_.isInstanceOf[ShuffledRDD[_, _, _]]) === 0) assert(ancestors5.size === 3) assert(ancestors5.count(_.isInstanceOf[ShuffledRDD[_, _, _]]) === 1) - assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 0) - assert(ancestors5.count(_.isInstanceOf[MappedValuesRDD[_, _, _]]) === 2) + assert(ancestors5.count(_ === rdd3) === 0) + assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 2) } test("getNarrowAncestors with multiple parents") { @@ -815,16 +812,16 @@ class RDDSuite extends FunSuite with SharedSparkContext { // Simple dependency tree with multiple branches assert(ancestors6.size === 3) assert(ancestors6.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 2) - assert(ancestors6.count(_.isInstanceOf[MappedRDD[_, _]]) === 1) + assert(ancestors6.count(_ === rdd2) === 1) assert(ancestors7.size === 5) assert(ancestors7.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 3) - assert(ancestors7.count(_.isInstanceOf[MappedRDD[_, _]]) === 1) - assert(ancestors7.count(_.isInstanceOf[FilteredRDD[_]]) === 1) + assert(ancestors7.count(_ === rdd2) === 1) + assert(ancestors7.count(_ === rdd3) === 1) // Dependency tree with duplicate nodes (e.g. rdd1 should not be reported twice) assert(ancestors8.size === 7) - assert(ancestors8.count(_.isInstanceOf[MappedRDD[_, _]]) === 1) - assert(ancestors8.count(_.isInstanceOf[FilteredRDD[_]]) === 1) + assert(ancestors8.count(_ === rdd2) === 1) + assert(ancestors8.count(_ === rdd3) === 1) assert(ancestors8.count(_.isInstanceOf[UnionRDD[_]]) === 2) assert(ancestors8.count(_.isInstanceOf[ParallelCollectionRDD[_]]) === 3) assert(ancestors8.count(_ == rdd1) === 1) @@ -834,7 +831,6 @@ class RDDSuite extends FunSuite with SharedSparkContext { // Any ancestors before the shuffle are not considered assert(ancestors9.size === 2) assert(ancestors9.count(_.isInstanceOf[CoGroupedRDD[_]]) === 1) - assert(ancestors9.count(_.isInstanceOf[MappedValuesRDD[_, _, _]]) === 1) } /** @@ -868,12 +864,10 @@ class RDDSuite extends FunSuite with SharedSparkContext { val ancestors3 = rdd3.getNarrowAncestors val ancestors4 = rdd4.getNarrowAncestors assert(ancestors3.size === 4) - assert(ancestors3.count(_.isInstanceOf[MappedRDD[_, _]]) === 2) - assert(ancestors3.count(_.isInstanceOf[FilteredRDD[_]]) === 2) + assert(ancestors3.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 4) assert(ancestors3.count(_ == rdd3) === 0) assert(ancestors4.size === 4) - assert(ancestors4.count(_.isInstanceOf[MappedRDD[_, _]]) === 2) - assert(ancestors4.count(_.isInstanceOf[FilteredRDD[_]]) === 1) + assert(ancestors4.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 3) assert(ancestors4.count(_.isInstanceOf[CyclicalDependencyRDD[_]]) === 1) assert(ancestors4.count(_ == rdd3) === 1) assert(ancestors4.count(_ == rdd4) === 0) @@ -881,8 +875,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { // Cycles that do not involve the root val ancestors5 = rdd5.getNarrowAncestors assert(ancestors5.size === 6) - assert(ancestors5.count(_.isInstanceOf[MappedRDD[_, _]]) === 3) - assert(ancestors5.count(_.isInstanceOf[FilteredRDD[_]]) === 2) + assert(ancestors5.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 5) assert(ancestors5.count(_.isInstanceOf[CyclicalDependencyRDD[_]]) === 1) assert(ancestors4.count(_ == rdd3) === 1) @@ -890,8 +883,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { val ancestors6 = rdd6.getNarrowAncestors assert(ancestors6.size === 12) assert(ancestors6.count(_.isInstanceOf[UnionRDD[_]]) === 2) - assert(ancestors6.count(_.isInstanceOf[MappedRDD[_, _]]) === 4) - assert(ancestors6.count(_.isInstanceOf[FilteredRDD[_]]) === 3) + assert(ancestors6.count(_.isInstanceOf[MapPartitionsRDD[_, _]]) === 7) assert(ancestors6.count(_.isInstanceOf[CyclicalDependencyRDD[_]]) === 3) } From 20bfea4ab7c0923e8d3f039d0c5098669db4d5b0 Mon Sep 17 00:00:00 2001 From: lewuathe Date: Thu, 4 Dec 2014 16:51:41 +0800 Subject: [PATCH 61/82] [SPARK-4685] Include all spark.ml and spark.mllib packages in JavaDoc's MLlib group This is #3554 from Lewuathe except that I put both `spark.ml` and `spark.mllib` in the group 'MLlib`. Closes #3554 jkbradley Author: lewuathe Author: Xiangrui Meng Closes #3598 from mengxr/Lewuathe-modify-javadoc-setting and squashes the following commits: 184609a [Xiangrui Meng] merge spark.ml and spark.mllib into the same group in javadoc f7535e6 [lewuathe] [SPARK-4685] Update JavaDoc settings to include spark.ml and all spark.mllib subpackages in the right sections --- project/SparkBuild.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b16ed66aeb3c3..6ff08723772aa 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -365,7 +365,10 @@ object Unidoc { "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg", "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation", "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration", - "mllib.tree.impurity", "mllib.tree.model", "mllib.util" + "mllib.tree.impurity", "mllib.tree.model", "mllib.util", + "mllib.evaluation", "mllib.feature", "mllib.random", "mllib.stat.correlation", + "mllib.stat.test", "mllib.tree.impl", "mllib.tree.loss", + "ml", "ml.classification", "ml.evaluation", "ml.feature", "ml.param", "ml.tuning" ), "-group", "Spark SQL", packageList("sql.api.java", "sql.api.java.types", "sql.hive.api.java"), "-noqualifier", "java.lang" From c6c7165e7ecf1690027d6bd4e0620012cd0d2310 Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Thu, 4 Dec 2014 00:58:42 -0800 Subject: [PATCH 62/82] [SQL] Minor: Avoid calling Seq#size in a loop Just found this instance while doing some jstack-based profiling of a Spark SQL job. It is very unlikely that this is causing much of a perf issue anywhere, but it is unnecessarily suboptimal. Author: Aaron Davidson Closes #3593 from aarondav/seq-opt and squashes the following commits: 962cdfc [Aaron Davidson] [SQL] Minor: Avoid calling Seq#size in a loop --- .../spark/sql/catalyst/expressions/nullFunctions.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala index 84a3567895175..08b982bc671e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullFunctions.scala @@ -45,9 +45,9 @@ case class Coalesce(children: Seq[Expression]) extends Expression { override def eval(input: Row): Any = { var i = 0 var result: Any = null - while(i < children.size && result == null) { - result = children(i).eval(input) - i += 1 + val childIterator = children.iterator + while (childIterator.hasNext && result == null) { + result = childIterator.next().eval(input) } result } From 529439bd506949f272a2b6f099ea549b097428f3 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Thu, 4 Dec 2014 00:59:32 -0800 Subject: [PATCH 63/82] [docs] Fix outdated comment in tuning guide When you use the SPARK_JAVA_OPTS env variable, Spark complains: ``` SPARK_JAVA_OPTS was detected (set to ' -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps '). This is deprecated in Spark 1.0+. Please instead use: - ./spark-submit with conf/spark-defaults.conf to set defaults for an application - ./spark-submit with --driver-java-options to set -X options for a driver - spark.executor.extraJavaOptions to set -X options for executors - SPARK_DAEMON_JAVA_OPTS to set java options for standalone daemons (master or worker) ``` This updates the docs to redirect the user to the relevant part of the configuration docs. CC: mengxr but please CC someone else as needed Author: Joseph K. Bradley Closes #3592 from jkbradley/tuning-doc and squashes the following commits: 0760ce1 [Joseph K. Bradley] fixed outdated comment in tuning guide --- docs/tuning.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tuning.md b/docs/tuning.md index 9b5c9adac6a4f..0e2447dd46394 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -143,8 +143,7 @@ the space allocated to the RDD cache to mitigate this. **Measuring the Impact of GC** The first step in GC tuning is to collect statistics on how frequently garbage collection occurs and the amount of -time spent GC. This can be done by adding `-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps` to your -`SPARK_JAVA_OPTS` environment variable. Next time your Spark job is run, you will see messages printed in the worker's logs +time spent GC. This can be done by adding `-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps` to the Java options. (See the [configuration guide](configuration.html#Dynamically-Loading-Spark-Properties) for info on passing Java options to Spark jobs.) Next time your Spark job is run, you will see messages printed in the worker's logs each time a garbage collection occurs. Note these logs will be on your cluster's worker nodes (in the `stdout` files in their work directories), *not* on your driver program. From 469a6e5f3bdd5593b3254bc916be8236e7c6cb74 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Thu, 4 Dec 2014 17:00:06 +0800 Subject: [PATCH 64/82] [SPARK-4575] [mllib] [docs] spark.ml pipelines doc + bug fixes Documentation: * Added ml-guide.md, linked from mllib-guide.md * Updated mllib-guide.md with small section pointing to ml-guide.md Examples: * CrossValidatorExample * SimpleParamsExample * (I copied these + the SimpleTextClassificationPipeline example into the ml-guide.md) Bug fixes: * PipelineModel: did not use ParamMaps correctly * UnaryTransformer: issues with TypeTag serialization (Thanks to mengxr for that fix!) CC: mengxr shivaram etrain Documentation for Pipelines: I know the docs are not complete, but the goal is to have enough to let interested people get started using spark.ml and to add more docs once the package is more established/complete. Author: Joseph K. Bradley Author: jkbradley Author: Xiangrui Meng Closes #3588 from jkbradley/ml-package-docs and squashes the following commits: d393b5c [Joseph K. Bradley] fixed bug in Pipeline (typo from last commit). updated examples for CV and Params for spark.ml c38469c [Joseph K. Bradley] Updated ml-guide with CV examples 99f88c2 [Joseph K. Bradley] Fixed bug in PipelineModel.transform* with usage of params. Updated CrossValidatorExample to use more training examples so it is less likely to get a 0-size fold. ea34dc6 [jkbradley] Merge pull request #4 from mengxr/ml-package-docs 3b83ec0 [Xiangrui Meng] replace TypeTag with explicit datatype 41ad9b1 [Joseph K. Bradley] Added examples for spark.ml: SimpleParamsExample + Java version, CrossValidatorExample + Java version. CrossValidatorExample not working yet. Added programming guide for spark.ml, but need to add CrossValidatorExample to it once CrossValidatorExample works. --- docs/img/ml-Pipeline.png | Bin 0 -> 74030 bytes docs/img/ml-PipelineModel.png | Bin 0 -> 76019 bytes docs/img/ml-Pipelines.pptx | Bin 0 -> 56777 bytes docs/ml-guide.md | 702 ++++++++++++++++++ docs/mllib-guide.md | 13 +- .../ml/JavaCrossValidatorExample.java | 127 ++++ .../examples/ml/JavaSimpleParamsExample.java | 111 +++ .../JavaSimpleTextClassificationPipeline.java | 6 +- .../examples/ml/CrossValidatorExample.scala | 110 +++ .../examples/ml/SimpleParamsExample.scala | 101 +++ .../ml/SimpleTextClassificationPipeline.scala | 7 +- .../scala/org/apache/spark/ml/Pipeline.scala | 10 +- .../org/apache/spark/ml/Transformer.scala | 18 +- .../apache/spark/ml/feature/HashingTF.scala | 5 +- .../apache/spark/ml/feature/Tokenizer.scala | 4 +- .../org/apache/spark/ml/param/params.scala | 11 +- .../org/apache/spark/mllib/linalg/BLAS.scala | 4 +- 17 files changed, 1205 insertions(+), 24 deletions(-) create mode 100644 docs/img/ml-Pipeline.png create mode 100644 docs/img/ml-PipelineModel.png create mode 100644 docs/img/ml-Pipelines.pptx create mode 100644 docs/ml-guide.md create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala diff --git a/docs/img/ml-Pipeline.png b/docs/img/ml-Pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..607928906bedd224cbac4e2ff4069ee3cf2da261 GIT binary patch literal 74030 zcmZ^LV|Zm8jqpOp%8LFY-*fcFQz}Z7NGBQ@#u&=-WNB=a@l;MgoYGq@dFX zmtdQuGJ9qZ=nw+{))^|A5VYxep7@-W$Q?EG9b$+9%p0uo5KhvMJ9Byn)RW?)|so;J4zTimGBcLrYz=$L_-pp%o4#t zKLV)1!6ykrGo={82f+xSpgyW#nAt?2pg{eg!u?!82J00a&dF4Ac&Nxkg6x>c@KiF>T>Jj=O9Qh3LW->AUtv`D6L3HW9HWt#O$|LD}n;m z99?p|^B&NgLsHKi$?>=r@+$Nc^&2VroynSP5S#|9kcS{8UgsIn&Ap?BHbI zh{dN)Tn*L?WF2uS7YYm!n2NdR-U|j2(QQy1OphFfV0KD1S{MkI3FSl@8PEUR>p!az z5y517e=pj)N9X71$T07P8p0qYr=$oXl2r*nQ+g(c$!j+>&X!bvCI@vMc{wm6F8eo& z7#5=-yThK>fAsy#;Dfv)9~5OxI41@Rk|N@SVEytzTV6+$DkH9A zseaVZf|lV--f-K&jVa)vs=Jts(((oV_mhA_l8E|1n7p4vU~XftiB$5xkzF9n3aswy z??C&*NCI0vvso2sh%H}QbKfgiS%YVga@hpcYGE&+eZ?jQK2MnGR`Wy#ygz8^dVQA4 zSPj6h;sqEaB(8X7+*c;Tz()WT9U&68_HPUm1jSLL@aQIB@Q96(=NraXJMQ9J|M0PZ zSJ2?Pt{E#n8lVyPw?pvrywSW!-#Xr>+{r^K$W8ah`6Liikquc90Dmw@%w)zHSBda% zR0sUxc5a~1D^EuJN@6Kwy*F(c+ zBmH;l5s^E#ej0Fg@qRg+eJv9c98(?Yx#bAmEKkl*LIg@}Pkj5763g-OKa+ALKHqMKZh!OtN|$?xwv zf`MqBeZ#RP0jzs}BFtU$O=Rr(p`oC`e^V(Jz)&H>7$@Y`BSD*QA%}XkgWwnd1~zbn zGBLlDD&{nSMfC^H#XcYwo^`vycUc3-E~GR_wsC^aR`Rl$!iQ&!Of{R3rDZ=4MWUa9+lc(TIrrIjn3t1#CZvWOvT-9`rHF3ZaB7FfKp7U*9|_~TVn3q9E_Kr0$3e;q|8&?RH0Eas zusUIFtNG)fheB$g*f%^H#EJk7eu_hP5_sm$touM zrVBxb@-YA0m!!>CIFI(Urz@P z%XSTWB4TOf<^EdC1uRh@DAxjDDrz{3L;MS(5$AKFTK$TpGDodu3$NYOy%!ARxDz=Dk+Px_wt$~MTvWF11SI-}|e-VAY zn3Et=IKspS-zZ66pUjE zLil*y(6g{A%>a$5Ss-B0qF=x#&_@0Z!_Fd*?b8XPZ{3Xmlj0XVYryxy*Uv{>|CS3} zh*7S;p??h4cOKf8;|jOU9*Uh;(8YE?bcAx$cf4@rCN+|g>QIFOXf_`bh7++=Vr5eW zv;n;sw7_?cLV@;udi^&YmF<`{7W5(Rl|r3$gq>9=UpOkFfy*L;8lZW;Rm>1Hwv&S* zeaiy7*9b=D$;nmdHCMQOO9)JY^m$qLF@Gx>i46&0nZ$2?QGkpXQS1K5IYSOW^P_m% z0rYxb;{yB-heBr1Wu-yV8DIzAbb^ujz>&cS=7b<9a6yTgHu_$ImRQZ-?n`l|zd6H& zMaU2LweYt*CAjS;rlH}!;@@GgyJ_vfWKXcuSA+nC7f6JR&aI}T0MzvG1kiJIk*}u} zG(;G=eO`*t$JjYJ@qdw!B4c2HdgS3~BktZ1zGHrSKi81R#Wm|{Cqeejeo6SW_ZR}y zR)mj5L08*3%TPrC&l*6f_&mWDGbmMk1kVolQ1wRLQ2_ao$r6Syugvet?*tp#$Y%sO zx}l8u_AqyqeY0nvJIE7&HH&-oibYG?!fjd+K7VAw`-yvsH%&#S|B{z1L+>!eGC+kSSncC{P8F8}j+#&VTjJEUFP6){` zwCq3{a+t-w@9~gC-Qkq0;4!oz#x4b(+25kQxL}xBt>Dp8idg>>nm@!0hx#nL6c#+U zBQau^imFXXRhtKHj7#+8e>i>_y@=PexuNF>ojiNiiwFJQZik1?bg&SUM0L*wU2EqL zor6vSL>`2oi@shc;s)q|sd!NTz8w5UUNmV1dmwmWg%*Ey&!EQXO7AZBb%k%n7*l?0 zLCj0|5|qcND>6%I{QP4I#J|X@egkzu{qYZa2ytBQINT5_gT(`kPjsEwZ1Clxa7s(Za1_cudv0 zQi}mW<9B}jumZ&689mSkFJL`S#ThoEU}qS#!eAt(SrC?8F1+dX@~ zYZcX)zh~PjL|D%)n2u}<3gA9q$aq>c?kUD_GSXo%KDrfkcH&Ii>0xsw!m(#fekSCr zFF0$Lmu*btA&`SFvUifFiU4CEA)03%BxSOA?5BWjz{)ySGU-wRBVHAIJVbCv$O`|2 zbH&-2155#Vr-~!KS{}MypZ8cSt;SIK=KBE=dx%%4fW|P%Ni2*kHrQ6T1>jPL;v*i* zA6m7K!TqN^cF1^QAi3iy2NE%{SeRH$y5V@Sm}1#fh`EPJ|C-6q9&(oMd5#ObRN- zj2GO?lk?*bF2pF97%`>~fKrhF^-*?1M|`V03m*9nRid?#Dg~*3bS`+Yew$M-_bD$Q z3)10Qy32{GSqcc=zV>-_Ks-8o{-Rl&9&ppZE!S0AOUaX%(9K~9(n#}LbQGBps z@YD05`@qP|^4N7st_LowYJxu{vC4TS5Ktul=RPT!EF9(k8^QoqM}|{6{mjQYmvVXfokpTq$7KS_DLyi#uu@MeSWFb}@!}56E0D?7Jpr zIgn#7Xl9wQ#}_BK$)KU3vV&6{oI|3V%VL%R@|fK+oxMEkxvOPYmjhuj@978lCRQ1d z2zkI5z5NIgE6yq}g^6FO+)qLqV=?vALtILPf(;F}rODtv0-(t?t?}vg#PjnXIdz74 z{p0_qv7>z*E4VHQ8#WIG6bBegEX>7`Kb&>n1gm;}#g=;F4x07@lf|b%!LhI86_jZB z6R%XhKa+}J6BzWOxb;)3;r--t_H{^`xQUbfO6{hLNz?q$+nl|DRj%PVY^>#XAq9(O z$-Uv~9MPlV6sw~1Eck#>XG(GW-?`z5*N&mh8b3uC2l~^rQz&EzigA$;Uv<+Vt0 zAFl>Z3z4Uyb0W~w05pewl94guic9ABXPBoQ{IV~Y-G5ve=7k5iaA-;!%Fvog29w%e zq!!vHTN|a>cF_gcG^stYe$BPo)m=o#%OJ0}zF1KW3#4dsv2>NrALsrPt$sQoHqiqx z8{hYjfjMD6Ul30(*=KEw2PZz>CAnf;5~`(r+TF*R9_dRKw{lt~9|{aMm_e5ce|X-9 z70ud3TDW>LTe|gWZhAV~-jWR8hGfhrTg+H0l7wX^{ zH)1Pn_>Ov0BdQFv@w22!4p@Is&;V%=lZ11=*GH(jeB5?Q?t-{ApVLxHu7KTKM$WbD z?~3ie8dnV1KF!Ap*xZWpYg{@X$=tu`NAucGYDWmBSFcBvIaw%fd4S-^FIWq76Wpm| zMrKPXxUQknnbhBg^Y53-Krr_xsB>voF2WrsRQ_HYSsxa^+uWO~Gn=`X$23A&hbx}! z#+Zmf3q)BzV2V-(<}DB~K?*PX9C{q1;|?=mL-@)mFtOA@Ic6T*bTC!^vTB)s^71ay zNhR|sXG`#b?zj(;?2s0)RiN@DJFcVrw=^0gMGl{eXfI0dc{_!Pk+_TpKR?jWPGSKr zPKAse#HxrF=}C>^fUmwU_ZXiN`I1`!A@${L-tW59WrpQl^Jr!si;Vzjo9F%#GHX)WzLdyl8K_S5=4f!>4yzX+m6Tf~_7Q*kOQB{Z~Dhl!DYOx$E zLtAHk&P^UkX#UTMQYHpkOecrfq!~m6=|o!`@voxx3Zx7u$SC5-Ts}CZ;&15We6ZZ@ zt;Nspn7NFA`N*PP?44PwpEVIOu8N^1j5aa*!MD1e-SMC%XefQDpx$>~)>kRVg<4 zk1l6S<2E$d7c>k_gJG=0?M*Sn*>99W4e%CQ?VJ*;@(@3{DCSs-xPG4c$JRH?)k2zZ z+q9D!Wtp_u=mlY3d}a|l7oXxiO5!)tq!RiObY>w^UYFD7+!$Xv+g3KXxkD8+;5C!Y zd{zm(CRw)CD_=8sv~ji5;;n@%`|{~EOopQQ_y>~X(VipQY}%X6thl~*CSIrMkrcTK z1M@LKWG33*E|5utq(pTD)F*@xr3F}Jv#9}=+{~>^PCp`SF`5S2DP>wPzP)R5|`1Y2gbx1b{ZWuPfGh2=+oZL`diAy z!EH65HOA1_yV;VqKW`V=ERO|1o_(p&XwbLgdqr2`Qyb9}#0E>vbcQh1np>Y>hi} z0!>I<&~PQkX*}4Ytb)eB|Gwg3Pe*2Ghu5W=gEZ@v$p;mpW^Z3ACW}O(_B%R3h);v{ z7z(Xw1~hBCh2eRSY+?Os-cV56q09RxeNfYu4{*cWd-`#pCUcuAFks64;=u+N5eOm zOm3S-Zr&f@619aTn0OlHL}%k6fJV!eZ!SkhreYPICy^NYr`KA2_fn%~JF3o_6ywpi zvXy=>`jx*qjl(D~yk-l>QLD{CK6yz|S(;K9z*llvR_A<-kp}kCJ6wkVan;dFB$v?8 zeA!{NXQ3=WPQp>zaAii*>befk9zJNUFhYM;zh}eJs&=@JQiD{xRlVgm;VBDmb9{jC zffjPDN}L)hsdJ%^3EVDMMO~{fgsN6IhdX<@lm5aC_-shPSgOi9V2NxzuhKT2-a7tH zA+l7cyVTo)I4OtCW>HMrf*DmzuhE&(J*rIvIaF*YlKrags>AZa_&8u|l^j)u@}H6p zhkEg4A&{J>r!qcdSpzl#5kituI4O?bW%uTQsC`Me}?!O_hcyXCuX1&!+NZ^U%v z_oxj`_5G8nb;;Fz#xREPG;+7BOzuaMrR2E$N$dBDVwH6lP*ork21*@d#A7o2qbo2k z8$DD)oHYBw2bGT&kvc{bxf*rcv5H3g4ljoYaCjtzOsCubnw$n>b}t}NJ=S=_WOuFd zp)^kpAD2-p%;Ep|^(vZvD5cvZ^V<>gANSp%A6Pev+F&b)(D9^{cyOK;HKGn_DcgE= z;J)J*G;YQjjU}MnJl<@5M|F%B&YH4+lkER-;(s3YBm?Q!Kj)Mw<25my0WnJ2+As*~ zGm0la4>5K#Ho&Kj-Gcvu}MR%=m|mJ1bS6J7|ME0M{7%Cmx@! zg85YpqH-dv+-qx!1}bp_w(Bp;{+b)z#F-~vSkBM(F7TD!*56XJV=eso*Oi69Sd}$v zWT1gDrk9LqYBu$&XNFIjvvljzbGlk&vihP&anz)YOw7?tcyck323^91PjU)z%DyEB zX;PUTX|0i^B3e2rzC;rJx^fo;o_c|}X@I~BqrPaKB@#pgXC+`xy-mCuA*5HW)FVB4 z^U^kDt*mP3F>hsY*(qHErjdh_zgFY~D8ZqbxAHaTyBwRrUg`?P9_`JUEJP zEASF2hkkOEv`~k9BkuW2!{CDrx{Tlgg8;g>I;=u`I^BT{^k0Xw$?V}g>!t1(pV$0ttlw^()JKti$T;G)3w&d|P`=D_1~O7W z(D`3YnOORuLy^8#g=XhJxpCn=xui?V$lSPzt8#p1kIp@J!)1Nsav5s?$K_s?K8X1yoH7E^td%|LJwg>2rG?aBis z*pUFKE%6p>e)XxSIDYqIb$u>_6)^H80^^(XA}nB zN=-{k%ZcgP^o=5$qd6b**$Ol7WnnhSaTc_Kc-Cj6&cAyUv37-KjpZt3yOOPp>Tj(9 zDUtsVYJ~-YR&1s#9Y^Vo z*c%Zw>g=<$VsNiB2+jM<3puTDZoFkUBx>7Q_E}NuyYs#UF`oZE!Mj+tD&H0eFRttz zt_qU}w5V-xutDdMhZc^Q?=#wTws1_jBDmWVK)9xfaCE~wq($srqUpKnpln z6mbLjcAqPf0*U%6b3yUk1{L zg266zYRNJ?(LVWKsx@8i*;LG!nu@eHHpH3V=2cbiMi}@aFbTc}uK?IA-ktL)9AvV`N)_ zDfT#m5Rs@OY*!gQp!kW1ncywxbXW`~88I}2$t-Hl$n(bK$wBLyly}Y4`cD^JI<2fY zTr74=8{ge(kMP?S1)`^(41LH8?07DUHsv1jL9^^oyG##s*Ynl=DVhM(QoB8dzUElV zl;r;f2tiRIf{tSVc{t}W`CJ?6bl`VEj_1yZTe|99h`R8aFpu0RJU%z(Rpwn8|S%s+AoD-l!`=Dc%rW?CP6w% z{!~21n9oH++1Vj%{&Gp{k(73mz7uq?BCxt#yPdWQP%oPMB zQxk$g@4G1CYLuJQI4;nHu|ao?(6~$~mkWev`*L~cp`p_zP?_y4&1;Je)KYIh;5m)~ zHEt_yBujGi8pB3oo}PG9;Ch_edQfpOQB8R5y(8}JZY~WNy}kUg1zi`5%X-PYBupo; zo)@tg!5%lPYHTM}B0f#tv-8bFn!bf}OA&Pt)Hpdx>zCK1xLtTRe|qkQCSkv(J^51z zm~nu`sNW{^w`hJh!dk-^u`VgXs<3|C8;@{yQY;F7^qvIwl?w5oh6HIwO@qFQM*B8# zou{{HAY?7{UpWH=Drm`^B+wea8o0VP_G=|5RQZ0Fae!!?#Agh<#1h8$`4-L`n zK60vc&ES7LsqOoVX)zJHK^ZYvjXa}%J<9B^7$vn8w??CanAaX9x?@E*5K4lU@Gw|c zORUt_rNlD;+wRMu7}QSDw1Ua`W4xf+J$^AOod0Je8?p^5Q2 zZxZvux5;h2-WC~SioE|$7|`PTTWyvF5m#Y7w8n}4>ITLHkINMNeAdu95*i?}E#pyj zaPMv}VOP(KH-gFAr}QHr(VdhI&1ALbo+nwW@pS&E!D3c_fj!HJUB{pm`oY-d5|?Z= z(p-qzl{2^2aHyhwAiobE<&}c2_;Bs9&5a?;u5-Klr<@czW`iA{>F7+JXR}T^95&C* zuKA&h*Gt1IP8VRBItFN!M-W428+psk8NP3iN`TE)(;`b5P??BiK`nQ3+bC{&G81ar z@sXr>tVA5Mnte~Cc)g(N3H(ZSO;W5i>1uaOA)_(#VVcTAY4+3fDj$IMn~b?0!*-ejd{;-#fYAK;%MO)oqFc*5gvbfgWQ^i;Kj3 ze16HAUoy?DZTu55%=5wO#_s!^4*-XB-;rpQO_&0n!E1vp57Q2OS8pkm3{)axHP7C^ zH$I}f^A!g_$#2ngu{`Q);uD61bP%1?)fu8R$saWLqN1>y(3H#PMMwTr43#g4Uu{Nn zx}%4UOh|fyAY(~Yi1Vc8z&|@{R(gL6*!h?;or^X`gA*K8aBiOHh?E)7Wtx#8Eug9y zj*r$ppXiK%e6b(kA=DK*td{L$6w{)n>-K;oV_nsp=B|zPfzjSz?cg9s{6PWk8n6~K z%I2khYwNt)zl1^Zo)=kR`!`u2>-U@DvoaF(;W_$Ht;hDiX81+!JK*NEl}vVNOGOQv zs^(h?_k6?6`!Oc1DmiMhL_meWT*liNbx}3I(zHWF zbA#ICwT&c6)_bpY#zhc=BSlRcql&bn2n*}1E2cIk&r0Uecb_-J##|39xA8RRLzrOl z4fHUU7{3|AvfauM_V4D97+MNM#fkBl(HF`~1P^5t?L*Z~=!94E>bPK7duKNUZxzj{ zmtKEKy8!_x1)^$cVXV)p|5VwD>6xxb`^TV18korj+YIjNyKQ?A5DkjEBXv_N(?;p{p{NhkzpuL{rCd zA$JH%Ig7eWna)uI(N5HG7nx~yJH;TPJoNS88;$5)lwHF+djWDA?yCP0r9zR$ld&kw zt_oH&bUEo$em=6|7>ou`0cX)OqEvWcjR_-JQP&a7J9b17=MbGkGREI@;E15M>ggnY z6vst}pR8DGQQB*&8^n=zHFsXXl_cH`@b6FS)KE=omB(|%XIOtmX4gSIo6=jvnLfL% zX!XBirloJfMx|9z> z&kV&`Gx2=eBm8t+16CAcO=g*Sb_5-sC#XIhvtK3@a)Ls=r0hqD(eUnol-wwJ+ix~e z&rAUqhQr}e&Va@KQ?>sanR zfpp{2%Rvf^+|{0G^G0SrYJhyuOmT{`A@k&&W7iaNqsd}^eeTaxg5d!35otik&HYKtMP4a(Q|W%w6@TpWZd{~Fui+$B5lU^1FAGTK8+eIZ?l*t^ohScPf8{_ z3pz%_KlAfM8Zk+dk0~V)Ib{FL5rm5s;?0VW7FlZA4aYUn<}^O;m^zgw%#_oN-H_Ra z3bXc2u^ewEa~VU5)oj`VsW#QV1+ONWR@W2dT!Dbi`l)s<)2q&-FK^GY0r_8j#`0gL zMHKDO>yy+n*b3cMzTce<;wKA4$|qe&X)gUtkUNo%k;;p~jX^bt_Y2~hiJkFKos9k)BJU}WNsXK0Dd z`8HG60%D2e{vl7$aszjGaed8;J^6!XhvcDziU~gvL(nYbU2+vC7-3+Nvfpozb1{J< zo0+_KYjO|$3n)5cQxM&W7IA>z#Jr)X$I2)%90vt-5^G2j9nZlp; zgb=kpJ7}1vlGaRtlLst6n5)J%+ORr@84KHM;4EeXf@ifF7d#`qjl6s%;NWv-#w(P1iXP;lCv->hj?y zg}UiHHZ0c{{?mxUx;g3Nw_u(Wz6s03W3q6^H!l7yP!uJpAJ!vher~M8rf;aj1|U_G z9Q%5RoFAsyHeCE3IX(n>M-)2(3Xd4xz)dF~9OH*rqO4)KzKKD#wh>cTclWff5@R~m zVFYD9WBlE#nRmGkf*dHo>(IbvCm>Lj=u}i-$mt^eQ&gn>_w=7FgmAf&z>r={CLH*j8Kq9-bhxG0ju{ur*rsV5-=CrQM|ijx1H?Ed?K5i zbdig*6T!oDE2oVkunW;otkK@JL*$eW5iblxb4?ZryF0Pvzj=4lhipa#NEA|JAH{aNiuu41gM^nAHNV-@fcZv{gF?T5vp?vYa&ijx_7i3=wANXBh)$#P`+=i6y%du*$2Ys1^GvL18L}2T|(3WcL z-hHdD)u#RDGFlLug>W-m`=K}lCN>r1Dlt#iXDmK-eaL|@a&KzIPofI0wQ|iD`jy{g zGJ-q+K4gI9(5e|lMjjtFtC8=3pvle6O;SODPzSQI-H8G@JnUqiKrKu9JD^r?clFE9 z1G8+YE^gyo#xHIP5ES^JeQqOAqot%CaDGDa9etcEe+PwNC5U3bWke?==V0V<%H&7c zl5OE8U^_TShE*O-S7<@pHFIt~W~td*QXNi72@B8D^T(LlvmDR;S{B%+LKGrfkD;9y z*4GhEyui^$?JFW*;o%kSMmGwv<3$vG+)%Qu&pp z4ez_tdHpE8dqlW6DHkc)i4-6}7x1E(?f{!8veeES7q0Kx;vajN4`&O17Sb?m1Z+i% z&gM;yL_bR!7X-9NFd?@6t4Y)j&sk*K9X0ZMNQe1ziHu#nGmWU)aKU4DKPrX)i-qQb zy+MLq?vgmWcuQMb$yx4~fn!cTZ*=qFA>&KAxm!MxkU^B;2Oj*CE2?rCQ(qF}EC$yn z4p8T@m&bb?yNl$$SC{?f{y3h9IVJyA#LlWOv5SL=a(}Tc(--1x35VyI0a8o$Zv_af zZt+PG$xbNR7%|De9pR-l2&Q zlI`aU1}+H?i0g}I#0ag+f7w%V<6P&|OPG)@^I8}{Kh(nhh*98=Mj+P~>#WXo#(z^| z2oFAVc7ykHk|Se}%#Lz%@NxoIW~Vw&8<@Z;n7B)gQ?qK#tD zoi0we%4?V#_1gT%q=-0|tdB;iKYQGW`%TtE2mG9+-X6p)w#~7F9c%jJUev&b@ z0gx3S)$z$3{;T(sffNMNc9GGD_v(RZXqR;B$&~CAe$jNi*Jw8;?g!_NArRf2_q&jq z-C!qn*#648zgqKPouI&1;-*JmJ$+20s3icID55q%H^OO-gbs)2Wxc`kNTFeK9GhxU zCQUDvK)?%L>yM*mbodOA0l@|ZJusjoMV6HNPdel&IIyZ2(*`wNXy3qbX3U!}u!iz*wI66X5r*ArE!<=)&=*p_{P>{PYSPkCAvap5 zb$1vntDsdp7_ca*S&0l?nmn%B2*sAl$RIx&s2XH59FmY@fS_&R`$Y^$jktSfjnfNW*pC#o=)=hkMlSDlgBI@; z{ZXt-ZaOr1$A2r6x%vjUSCZrg`!jvt*C;(po_7@fmkxr@bN+~Yg$REsxx8qtW_#lr z%;cnQM^NJ)J@@Tm8yesM6mT{hCXsbXJb3e8(gTiS9w&J1M7mY#X%znuaXED|R0${j zWmk6I_F3_e<?(Yt)Jrh8NIgElK27_`=V-nl4h zZn?3Rwr3-Z8VI8(HW!;jw%gJQF8IsA$|PX>i5NSXu9QEFlqBV68@eg-ZcP7<!s3F+KfC^f?JM$FPOa78G&9)Jk;dgReR0LWyzvsxuIM1f9F zv0Lh_F(aQ4yYXED6pHfz>#TuuIt=MY0x=y8tn9PLMm?tkiSdc8BAtBt2_>#Z3013O zQ;TOKPeYRBuh{Jjx&wJ#3gzTe`!sR+7hv8o7l36D%K=L8TwF9FEA|}ldB6{=o9Cl~ zz0RY2ILUWDHSL^s7%MF;Ng0`xzR>WHq#4FQ5gwV?0FYzWiGmNp`Oa%_O+MG-gKSG~y`c*=^>H{^9!!~Idok+w|} zHLAoo1Z&kVR)P`|c-9klX&9&cWgro5x=f+SlFn!le|mZ)p)cH9p-hUJ#I{e0@ll@R^D^gZh}5>vAFEcQZlrgqj(_Z+h)t^r{qQdK z$I{F%8tHEzs)2+L=SE6uOtTSj{b8xNz66v_B7YDpL4Ywoj%o5#Zj&r3 z-H6dxwW3V#G5W=;Mub8oKPm1dU%Pfv(iqMr1SKssY~$YS)|yU&|4P0QHv=a&G7{zz zBRs#PPbfdwqe}5LmdbK56=1 zF)0l{0c~c8auJpi-`TijJS$*Bt0{chVoWtktlIc@4&4Wy>`V5WOyTD|r(9LLXMUJa znh%0k$r%s78&t&YXj0&Iq~AsBGBPeMcf7XZ^1rN^X%&cq?oE15O&Nbl=~!zNTC@0%A|qjqBb-JU_w-gW zO!1H}-h7WN*eLvuF-_m0+LPMCwAvVnL{PJ6NiOUURvVkXbWbsTEfyTd`Bo*mLo-^d z$&_5;z*ES;A8c2X-2FAUIsp>Yc?dDZ*%904sR%v)2Sm#FseG$K@WJ=Wc8FOtmH{V| zV&Bu3nfn;zx$y|U)0J5NsJxeE`egi1-n8e%ca>?WG7K(b z%Ebc;lYd~|Urg`>tJr{Aa^hcR)4xw*sHlnA#23REh*J(QWvAU?t>F_-(yR*^-NSpS1+i_hP5JcOJPswO+y$ zgS#&#Z*J3p2<;^Tmp@J2;AR?7Frkk5v_5y$y7zmFE>rU*k<{H-PoW7oj0j4CYzg7} zAv3(N9?bFaJF=Hn1351>s{U_@I$I>zC0kmL3Myhjinw^<2w5blw3PsV^I}#;kqW%J z1DtwRj9wye?@3OXx0e0eC{iyof?7vgS~odzux_-jX;M*?Vstp@s+-M497tuXsxpG@ zNcm=3m#Bd4PCp}7gVD^kaZ-ahu3pP-UkDqv|A7JOAMz zmoi^j0i5K)#KsENJUG?P;sI=bJb2=p%Eb7+Li4e_SF)i12N6iOCvYrrpJa$lUHHFPM3n=MB)Ac*$M4ybZfw zu@$t?Qmbzp7~j;M^q=?}4s;+&xT)2QPAhAqRs5@19Npx5L=Vs*8B z!=0>Di_U&xV5PHS1FQ^I-p@xUedku?ym7(;cDYKOo`fnpeaP3_m%~W6JTxWeVPJsP z0dZaV4=))A^AsDX5KxW(dO(UOL8sFAl6EzL-$((x2cRXYm4GP3qrzl9JSyTeNuLlJ z+kmR`(QG3jn3XT1OurtD&X+;M-oC_9zN+O|7u`MQWp_WFl$4NE)u z?*$}Zp~54B;8&4VA#TpfeO#%buNkZ+E{>`mg#NYjE8-|!*?3rKhR@sVtJ800J_r&d zUA0i3W%*$;^K$S6YPD5~IGTy4_9NLXbH};M>Acm)L~D=2i0HtwD>aj+iB(AAD5gTZQUGhEW5h?cDnso%IZcQUhK;3>QZYN+err2id|y7 zOEMweaB+t-8pK>`T8vSOeVlB$-YApN9ak;H-@ck5Xzm%m<-)YR4nx9#kOQys#l)13 zo^U@O(RFoy@o^d6n}|KMEtKYl;+i9-!^}EkZ(J@pw)F*HXSdvM7HO4mT)S>a35<~6 zf3a%=t$(-dv5Wz8j|j@mfYrwqkB(;owjW7g zvJJc?1*F0QVLHgRZ`BvgWR!}&eBOy*8ut5}me{J4zfYwf`#m!1^B9MrEXXj*u@=V+k72+`|`>em!edj30pjZ%Y1hTHAeO7M}{jOr4_ zg*ax!?|Wiflf%X+{34lC~aLUaB+iLxNUn8HbWglN-e>%kW<)7c1q6&az+ zzPiVCzDg3S;WH{me!9RF)OFR1R}k>gc?T#_yd9@3DD3Q9{bu_28=@HhLNUWw*r%bv zRiAT6+kJn!gY#EdWsEPf1%7=(w)xU#S*ypl>7w^ky{E=)@tTFNtR}CMne9VPtlGL; z5#Ml>Vu_!Cih;qI~=AVdOuc+Rq~&#OHPIxqYR ziKBpOHN=KEWDE#wP3AONeQr8XDDEvs#t~77j_s*vI73O>Ro$apy^Z&^g!53aBotP#CTB>oa-{k5>?q~7!5Kw!Hd z3Tb;)PRrgcnR;h1c7)H1rJ|XLziXR6)YYSS<97SkoROy~zHx+k7UcH}#F`O-`v`$I zqp37b-zO&ZI^B~d1E7e8_Ckc|H|^(1^IoS{{XKaw8@E+wkVy3>5Jq2KY*ht54LSr^ zwOZZ-qqtThC?cFm7B&9xwy@Fm;~ zKodHBuTErNUPNkRs`V*?#`$zhf(%0VF%?QCyKpnf!+<~FBo}Tn17;7U_e+3Yh^=@; zjq)$Y7nFn=Dcwg{HpQh)bkA}u-9T0cLDLEq#W_eZD@_Z3Ut2ZQUb?>Dp-4k=zon$o zfk!JtcAHt!x;>n>^6FS!<|aGR$J}#k>b;m&J6wy3(R6nuV!eyM{p-%sp;7`;piARj z44v8H{3%W$Z)H}1?Z8v@n?Wf)j=oxWZ@C<19{9TZjHWaVDv?AjNji#|{$&Qotj;Z4 zZoMxqtpn7imzmFgB@f7d{4W4Xz>{6W{-PEl943V)shgT+UrRLk!Wg3;NQ^^tV9`4n zJLW}dyK)7okdbg`w=u}RC>cr2YnH{Y4q%t@+$ryytfz(*;O6a;#B@X+9l6^r^&uRD z%XkAgvE1Ga_fD|`8-I&}j~|<8l@u8nDJYtWqzgTzmgWdAp)5B+9#7l&jc6U3gCT*n(b85J7r4wcUhDG`RXyK7O+o~1O>gA~$ zv1iJ`vc;dzJW%z=e+y%BuBECLW_PJn=VGC+szzAVtg-K}_B3*JD$77r>XG?mytAwe^k$PL<)&kVL*;Uwxh)PGgI(dvQZoIEN@9cBRw2VCFi*i37 z-Al?2(B<>2g?xM{y%EGYj5P%z{gE`=LG^z};?c0<#xdcRDWJ!lOPTQenyxlz-+azD z1g0x%^zOOBaLt267zaVPFPri$nlGrPu9K!zXO?KZ!vBZ7w`z!UTed)JpHz3)9|uXTRGedB>|O4S%OYtE8pqcgYh zXv+tGb8oI)m}dr87UX1);NBxCUUPN&9)G`1BOe>r4#kF4CrNh$5w$fHT4w0~3gnHg z{Yt>M*m=J|GOXJCpoxr3H%r5US121*2v`zCA-!$$IC;?Q7(F@2rY{;L$8VX7#5t4G zPjM58ftqp~Nr1`wc~Prh0GoB61VLXGn9l_{9gQ@*KFUzzi?cCvp)qNkTmzMXpeC=G z>_tvmW2{CwSsN+An^rH?1uZvQ{kBB@^WQSM8tfZx+MmvuqH(ugs2=Sdmsb+F!@Nwm7y&Iq%rfu=WDjw{c_l%9%`l8-`W(m>vy zQ0Oinb}Xn^k9mn*%RDhS@nS%flkiLxX8ZY5i(O!#X+}MlJfKv z@Ch}VV%$;~i5ProOZDGMP38dpHlJf>6T-G?@sZ|IwC(B>&CsHGEt6-=n@tY z_LpLeMn^sY<;I21i1689!f4onKYHxvB(n0n955X>LnGaHU7fRhx;R8jnG+~VIW{`6 z@6V>Ec>58tj@LdrADrZcd4eXd8>^3?{K@reQ{x`t0IF{4;Nv@y4k5G|#iDLyBWyB* zFYZJE88P2I^kF8K#}je?iqV@YQ+{4nEGbwsuj*&5bPwM;8ZzHEsg@1D?6qanqWYnl z-+YPa4S&ZaTRvN>-Z(LC`-3hrmG0p?amEd7eW)`i(ps~m=xw3r_Z5iVCnskf_`*3QfUawr^z(4C9%CRkby3fA z%HG~2F>bNr!78{98EFhZ**WJYVy*j0KaYeV@tRn?mx6ysbtgd)msCeotAy^RM3lbN zS*0^#;Ud>N{vF@8t-Qmogw0)azJ%vtabY?XhQ+h;p61wG5A*k)oW9{)joZn)>{r6h z!s@qq>MMbjaeVbP-5(|@WNXyJGn)Ouceiqwjv|x-q;ePW&A#_QY53SjFW$ny}h1Bh#r8G z(G7kY({SlcL6E8P3$J_(nhDtt3i5Xe2oU$7Angv%Yv$^WJ$B~{mw zUkr?WpG_7510&_lfJ)bn2zi71*_1QqQN{hUtZbs(#_>?YaKp&n{+Q$LrtNq5&F>l| z^7)&0+9i>+~8)q$5#S>jOTLO?HJ?9kEa`5hOTRWR`_sfW-&L! z0zTna9j9OqJh}M|@&KlxXS=@X1gf@zowJIWM7HJl)h+Z8QB#4M_~~;@E4=Dybv}(cfQ38Fj+DN|VZnwG~NSUR8~Fds#wLq{Z?xx+BcRvmEEoF`|5gDyzK5XKK;r z3{Daz1vC=X^p=tf5kgmqfzVA1{N{scFsE)0UBrC7xRl^&nE2K&oh`RMjgsh#{x~iZ z;yX>=t4TJOEw$Ak1-a}C%<7Fgs#pn!ETwMU@^$0f>dE>#rv?bOBg#L;K}|&5pecO# z1e0NwV~;o3n8$tCb`Bm5h-i@6eiHs3Tv>n}8~W+J2N%*$e&YC~Ei6Q-{l1ltV+O8} zOYmk6Y}jec>1!GO^H;s*SDQ%(y~3V0ZOTOG4)P8d+qITm+1bra%D27wSU9wuXnYM6 zn6j;!DS4-{tnX_z#@4=U&%SHFem2lE@XnzzanlyM#i;fJ^}=5a?zerf5&r5jJ*@?g z*G3Ru)21lFr{{#gzy{MXFQV%FZyNzy)P6ajp9jXead5`H4n+MDoI&Z8^eCeX6&~FqTQJT z=(3J8+~Mcinw2}Ps6_eVWp!Nm-%>76HDb1y#Rs*_F}6EQ={W@g(k?r#@=^t`YR!iD z%&(RH{hn=9ivxLr`V2-vWOWGO)KiReKzgfA61T?SM@h$8h^`%{?{7 z`&#d?>5v#3JMvSF$~>RqeoYs-?9;;(d<<7`apT_P_f$5O^ zeXFq;Q>%cRnqv#;9>fQSLoP}rAc?4WZHN5qWCM6rwpeXs+)U1`heR?oA7dFF&`yrl z^Yjk3_xr0@$Xd7(y)uFw{I4?nYj~jp@L;?$0DRov_y@Aj1XWpD!S(*PnGRU|i^be2 zd+*O=%yJAHTH4=vi`#-`(+_R1dCj5rl(1RzXx4kT zh`DcSJRqtJiDBX_lt54EWeWWq-)5Agnz0X}p^N*bOWR5?;_o+>nuCO$tZ5y`R8nP| z(9c(>{8^2LlhJB{3I7U?NWjw32C@5JK$;uq*Yoxiv_C`JxL$e$pdV~a>18AvR!G^G zUV=;)Sby+*MP$*bN3`A0=5taewdDS4i-@b=S4qGsTVGZIY1lMfOA(!?FeRC2+dNBZ$dgH% z$L9N)!adCIp86+f{?Q5<_c8G>`3^N*x~D{T7QdMV-$8I^ye=jEud+;N8x*D!uSp!8 zG2SunOG;fZv-sz74+qnp!JTT!R+AK)(mzpsACK=TWBG5eQc+>U_F{P5c_K2Mm&PE~ zm$uoE7ou@ULpF_yXVn^B@B^InlJjkU42x12x(yEBrek#Wm`fIqd%eIna9yd#n%II2 z433!4ClG_h<2uWes0TDiVX&U3ZFs3?ns~obJydWhg9~v=Nh`IKY%v51g=eJ)dPybZSHU%%qeKx<4slf4wd$>Hu60CgC8_pVK`I zy}rJLn*1)bxq;_CzlHsh+GwYO3Jl@-!q&?7g*d?yZ{Gq0O@?;miJcCc=AuO9+(}`* zZN96xx-#)KeYQsV_WjVYxByZ<*UN+ib#H_xgf{?}AlZ-ebJzi~B zd`2S-3EG=}iZ7nreLu5X7+;T9uHu|CR+;iwSt!nf+?U3)e;0=K>hp%R0&c6tDy!XaGUVl%4`~t{oe1tljVO1ZG<9r6Kk4p@){sVV+L3O%_-#*l*lQtGNo1 zk1|!CUB_sKU2D6dD`rB0>rJ#vZOBY1H$KyPJvzDJY{oT|Y3Yufyb)prRH)+oq_>( zg2wp&)qMPM4ZyJfXOhOg@4u5Ye3a>EytV-St*fpx;0ao^$nJX_%(mm0N2T^xmS`E( zo;8w{r11^JhsP_sqja#%kxw}#{NCzn5yk#jpVY|z}1 zm%R-tSKws=iXgbnqViDqlQZ*Q9?OI$nIx8|zwgsWq8_j%)-^94-RW42$~Z?t;Z4Nd z5*A*6`3arjIMfEoC7ALm3*VHYGiVM#84l-kek#eGmpulo$iJ$3zsOJi&nUm~wB1MRb^3iZL5*PWnB zsvIZ!b*6v9RR$m1T<{0Od;ZJ;C;M&{o%JdT2hK2S+TPSUc$R9^K4|lW4gS@Cjo|ocDSSN_nSG(1a^J2%d*Q zrf1Lkc*_5cPUTi6)k1{AZT+RLT=mkVVQVCF;hTRE45C&ENUH#8N=#Dx(A{g)F&pYd zDLzQ+C)(VJv3Htw)YNIF)nG-kqh=qInn`O?FOwEayy72EA4FI`2{I`LFGF%3Y;0AU zOc#tmf#^SbH88bUAqfwc7ml@ak>0m!JKJZApCBm%<*Pz<^nc1>wdcH!2hf*BwMgAQ z1H!oOyH}P5<~&b2TPy?G;OT2{trUwuCq~fl#|3pJb0rJ*QiETs1jHDTh?wWMA}xrW z^2oF~k}zTlD!9i69{#G#^~Ea3<1st*^upP;ODt)`{qFu0qOIBe#Pg-}Kb5%zDEpJ7 z<#Q#K0nm3A{ieAu415#1beBHb>6SL<@l*?wVK`E9)ErbusHPbJ7$ttkkfUl$1}ATTsU;D#I+CcTOuX7QJ+l5|5S_<)=Ub^`%g;|e1Y(#|>mL$NVja&!Ulrs+ zlL{=A5abdjYS)Iudp_6s>@|q)^)}jJ3fA!sH_r{o5U68io@-<(9kGn3@Ce?Ki?e^@ zd`^IHmQZZT`5L(TgV3nCGF*9SeNY9by$5QmS$xiv`!^kUZFa>@C#BK+yTbU zSU%cPDY5HYuQ>i1pB?Fz(P%tWgoiI43egxk!iaxEFwE7SRBWxNbN=?v8fxaOj8)!* z#%$<6NgYc!CMxFB)hRChI?|z3HVU-b9Ii{>{r2qk`P@A|=XVz4A9E!TZWu|^ni+fe zrJ2xNwA&6aL3)|dp#~WXm_ALr3>tSGi6z?xXvs4A6#ZO(^Z1qBfW^c1@@B1&CV{Kw zG?0i4O=njD;Y%w(#OGt{25RDPuzJ?+^HA*`I6NqciC7QGr$*O)QN2zz?S|(p^bYNj{JCQK7^#e+O8oc zKBprI{F%4Z<{@tmLeIV8IZ!d5R0w4?s>oAh^>nADI2`jjXPZP#(UT?#tV}Q>DvFh723n^%a^;)r6dgm(@9<&D9$6cH}0cD4}%;&xlA6tKy zd0t5NC#9o9yZtl~3PXMdQK_*nIUo(K)j z?7o%Es=2ij{a=28F1HRel_=lJS-sS8ZBag=ukWD|%$SL};8zannLa6KGp0A_L_Mj= z1n}C9y3(f$H2j>!l}dNw?H?%;?_e6epDbmMoc#R-QIQX@pq$vobq!9p#T_=F<`4no z+-5kPrZ=Bn9}gV3^0mVD;+@UG!w-`PxBilQl%~q9=kiI<(6GfBaIfuaf>pQOx6zd) zalPSXnEW<%^raFuy0ZbL19>fq4#be;li0A?Y1_C`em{-RBIT)r@9d&Q!@vdid|K3C zF7_>&CMpun(W3HO-_vYdNz8}oeFfGnT;q|b;D%HJvf$J3rJ|Zu&VOmeUM;BuaEN5OfHsvuYSI!am=Xb&b^(I3;$W>`T;3f(>y<*+jS>I710Z#+|& zq9;m{>ODNHvA^*W7AQwGYwBA_uTV-$kKPD^XRInENw@RxioPyczlYSqo;We1ttU55 zr)j|RT=lg5^4OuH-^r*sPRdwnn=M4HxCbw_k=&P*3Oj6erH${32Eq0zK*7ATb)>ET zNKpSaLqh)$r6#H@baC{Sv1cCOP7&GjwrMf+ZAH5}WB=)P9c+0ZTM$#dZbX!ZHc9#S zWClq|RCc5{ZxU1F%(<>IUFZ|X$2$@g6XdwqyH=)L&oX_^S`K{KTh}>NSZBsBb{q;G;jo zw@n$N)WHxbf_i$uxhf!$5wjCOz*Y+O$q`Y1p|QJ1uG#BEj+weAo&7n>X|+sRzWjiH z%Qlbr<{L}jU>~s*%%eA^!-HPukZkcrh;N0F{wE}46#Xs)cYIC9L#G0d3*Ihn3-PL! zdMRl#PKO;N(hZgAQ^~7?^_bIQy{^qWvkiNU;xtKSiQ{ogJA22<*7g8E1x1J+mt!&j z_ndnA!01H333#PipV>F!cUf7R`aV_Kv7xoKHCn4;teK`^F`b6o>V0MuD}vGwZ&RHz zWs|*j(k7#m^4~b>H%=5#?T8KmACHGkD`g%=5}OaPPk6nQ)(bfYD-L$`)ABoD0TT!3 zabp3@wfn0_H!8kUBW=GrJkSWmmu|$vXgyEp+l1lA8|SkJ&XSi>TT4C;8mtjajI=>* z=%e0;uN1c>w;TomGMf*{zjk;YL6OUkyMmJ2JlV;oZczsSIWKd5fOC>1pXZKJY`pX|>Lr;qJt6JO1sye0z%xx3gZ?tdIKVk4hElg^NVyaqdw zMU;)T$Q*{vGKq}>w7|(J=fzT!pBE7w^l7SleRHG36tD@@#jtA zHV`7X2>EUrPvFhz3f$k-mq*{wpY3{JAI4I1c6;WpvBfC@u74TN)cXts4$#XE@J$&9 z_m`&}WM@>3%P$$^-|`E3X;}zQ&)qkj2}dcLKN~+`r~G1JX>Ga^Uu7d{mJh1Np8oCH zyb~=%Dr60^^oS)a+B`pY_cd)48lH)dWX45wfv#JMOEElsZi^?~o+E8Dv#0&rOE&V$^_CUuLj<@UbGirCp-= zrp58V-M;Su!T7Cr6g}--mM5;lN|)5wFNWAxIa7xb>A`|*c)1Ur50RnvWR<(sKG6h_ zy3`#R1}pGn8f|B|`NTxe%bmZJ;_k`b#ZQCk$0{;%1fbGD44lHiF@iwAC2zU@n@)Wn z{{TqhCx#+M8U4)BSSR_MqyH~tp(-)$FpZ_K2_)Po`yoCOAnph`IJ zb#Bd=hb`h>VXxBJW2{wr&}I6BX^6rCn`!yeef@TVUb7YN<<-?((Bi5cq|ubD3s+5~ z%PRdt0)i{N$)5Kk+GLQ@`2C^&LhhmK>S4spsBPSU>GqHD-|O_CQtU*J76U_v436GU zQA0kg>%u3O6at)*^Omt3Uao^7+bUQ<{eiyM${_6ric&IJ^zW1kyEfwYr)j1l9;#wb zXVkUyihcW1HtKsxcm&2j>D(y!TxtX;3d_D0&7jYiE!$mIR-$SkwBHM5Gr7v))d@|i zd$hZabVF<>Fi>;)Mr+0Pgp5QC47)gMfOxWKDm$lfM+QnQ?W=wR&$X)Yc$e6d+ZSVX zw%_2kO@HWj7IgVq0jE$bagNmPJRCQwaCY-L#D0iLaqxP#n;in7OIvr$>R#!E?%w)X zw&?C`G&giwQ`r!nc8XzvV5e=0EPtE0KGT(XMF8B>msMZQ5&KeETmTjm`K~G3c z&?7?xdGdV}cO(cmH_!nfiZM8d+Ec-1X4ta4dpY9gw#cfshU5E?pQa(U^?@9bE%gca5gTG)xcHsLBF@dm?F+oUD0QcI1LOL98j%%1QH%gHd zZ^DlTq{@w@PBGDre5P#X1+kZ5S}Vb#LP6k80*h9K)_J4?3*TM#Z*oaXi&x9>4M^=F z+!1(sgtW!RebaS9MI=*{m_TTBY(O7Jgl07E($~XjS2?2&nYUObw<+3!nakyyDd@t* zGoVd?LMKT3lR5?}ne5p!IKOLcGlr8n(ubl$mm6xM1z~{!Qw&gNam3NPGZg7U5LhfD z@eKuzAs3&as(NRkO9SjD!jmJ3+Mo$89tVdlzbt^Q`sRq?KR+4f$RfTAdW09wp1F5; z_A^uiv%;A^X&*pgy`Shc)iE53p5Fl;RFEZ;3bVGEc9<5jP+*j=h96DZl4Th2ZJk8+ zEuL$Wldm|}yOg|*hSL&qVYH!rP>X+=HeIYXMrpL&h&yF(4_s^WFx?1(5pdteowWH- zw5<|WMe#C^j#avzRUBe}DYfWVzDnzOS`yh%GWgglcWJ6?lb`0`#~uc8+h`JOHpr;F+49DU)->D?O>xu>fm!4>%$ zafc`e*<%H^GNL&Rt&fHqF?3Ere1mi*+Lwq0Z@iEkt;xo$dk03De(^`Y#B#nT6(eRu z%Eppn9^W^wX~LSBF&M>nc0{6)jApQSCaX-gaVAIs$Fu>$^w3%v%Q!Ul4I;%Ki@S~OPJF1 zsct$alU)%s{qRm28QL>kBccjpGU71J1sf0R&bD=}g+(c^0-py)x+@Q;1@wZ1QN%-A z0!gzs4gGEVNI=a2$t1LcUjuf!XpC?>CR$MAx-B-^@xB3-%h1Ja=QQ7l-A6F_RbwmJ z1t6x8caXX8MT2e?<~B~tfOg&IX%D5P5RT%=YK>beR}F@Z^u=#1HI2{)&^f0BkR=WR zVUO5$Y=1l9wxZUBmrs-kw$}UkC+pabC=J$Pr>yyDPmD8m*Xe4RzAHWU@We5|tcjL6 ztwE9H6lAE_s3J$s-64Fn4)-XG)43cY?)<&qQR=OE?}o6^Ov z>-W(Qb}(o5(M>WdnGyF;KKJ;2-hMXc`!-$$RMn$qA$$s^5II=WtgKJG9v8BCRK$!w zthVREdr^_zf553?i{^-cvf;15a*M?8(J;ghK|Fl?ra{X8J!7Ypbo%Sp;A>PMnN-P2B}%;;4UAk_XS|H%PhKy1*WK@QXzVJQHmv#?JA zA+RgpaJ`p)eworj_thyIQoPWW+j4xa){NAC$1i-TS^xbp-Dk|~`Q`^6YJY1P>MA`E zRDkmj@@R4wu;g!c5RAg5-Qs0ZHqZcL_4j5xKwBct?d4wWNTR&_5(juh6oIzsLQt-u z(6S@O5>Ci8oexd>5GwF69CO=kj!eE(gZ7}7;<=kZG4`=uj$3{un}5_S=M9U$Kl`^e z-!286yE3zZV~)CeNJ0`OYv2kGt~mj>~Z8&HApj08_>9;yJ6D@?mSpW zapX82HB~d>fL#Z7&y<1h0#w1QYXhuE8D+_@>YeGcow{#bxPNJ6y%Ba_d!l1*b+8*Oha{922q$JuU6qT zvOS}j6As%$+?i{GkF{0#kZwCGGn01fR6I?)+wb>mhUf3u&LA+AxrM-#qEn4B3D$dt z8=uf`&E ztx7;pexO#GHiOFXB0sZ{AQwZ z&`zQI^8>qkz1Q;xf#c_iHaG3t0K$GpZ`A!KHRfvpzoHNAt2R{O?4Cw)B(m=|p(!ssa_A=R}LJ41jjy#X6-%FF=$f?J%VTYMKv z^{RaIn!Z7ga$&8(V(aJhjZUZ0CNQ4hSk%^Sf7hbBRQXIx+hw+pDI<% z0$0xfMOCE`tiE@jqhMOhi7|joZF3tB-=|I`c<@ikq52E7kF|U}zPePF>%0i;D?7oP zn4v+`vbu5SUOYLBb8xKm?7jof@=5EB$GiboUG;uVj**-_pDIrm|Gocs_Ali&t`|rk zdJdTYruF@T!Rm`4204{0oHwU=c{fMSTLVS&AS?W0j%oF>)2d!=HSpo41*%skmAP(Cu=B#ef3 zVJfRys@V)bx-iveE+KF#uGw~{v_izh+G-6w#zjO6;=7AQg@+}T4 z0D;{;wmVhj*@+r+^0^S?OSC`Buc1NI`WY|5zy4jaSP9bUm1jiSyM8?y69%yf$_yK{ zy=Nv(?Ojr~cUWJNrKa3kfT?$TCbVpZol(cJ74nkt@lJ%)-hdcFvA5&=dFXm6UFVo| zY|a?SZ0jmvrx?o46%nQ{&tivzUg#EILv~CQwHGE$;=J@o-EM{0V_IEPlCqon%q(iG zWhxw-oll96D*5y9;tA88Vcx}Ix-2$-c;4%GraEmIZ}?%@xKloX|^ zP#NWZEdrR(&^Xflv&RP?)(8@Y=`0CW12?4pw~EUe-4eDQma9q-=8Sn9)&zv311oL& zh-UI*Aa2qo8zy6?tk?w&=eyr9Cm;6NxbJgR!+uwL_`v!%&xSS00L?1~=l~iA;x6s= zjoh(-Ez0xImB3RHYD%zqGi;bdKblDbQ|y5AdVX8iyKHD>>zpAeGVB{o9Ko(A--p)l z%sX0rFE8d9(Y2UF*;SaD(cAdtB08*9b#@w#{*JoJ2%<4BJ>QDDR_P_R28+{kmcGC@ zoZwjB8{gkAouc+l_jg|t=q6J zBu0%%gv0{wB=_kD;zJImZrJWpr{@D6n;BL2x7=BgXMlKO8d}_aFzl03QvUqsusQ3{ zAC1KH7!I;=gO2dStuXly8>k-Y{zo-|O)@gEk)x_H-)W$SLv}Vc}SI8IVW5z!B^nr`imJEd1`U^^Y>tdb3}FhhMfm}z7s)9 zS-%54&Yqu8^GN`I>Xu<-gc+F!&>LAz)PGb!8lf`AcjG|ZI41dB- z(G@Ytp$h>g#WPOvxNWkrmU#Dri3R7zKVp z08u<6j>ck>ab;-akCd>m>FKBU=jDXqm9yw-G}EJ7=5II4Ymu@6-dM#EIHI;;8&Hab zRedE`sCu5na4QFz(Ww!q0jHzWaC_TGy;E#UGnAFa>vZNQjkh(OxQ=*VO{vCgv`Z0N z%_EA#ik0V#_j$5Br85_}w2CEk9oh`?VV#W#On-Q?Na2e|?wX0;z2B4NhjtMpk4}r2 zB$8Tfbbl~@sN}NM%jImeP4RJ}fm?-BL~!4>?(tZ~SrL!*z7WXcF1E5arH1pS?l!Vj zu1`$?;O0WXF-So61CjopKc_>x2SN?sju~!f?oDG5h1CQIVfd9FZjTSN1^#AncX;Gy z(W~I}FCR2WYk{=)+8~#47xL-1KIiy>0B~<3BLF=Cbi{oP8&Wdcj|UUv4+U&WM#GRq z2_S+yFkcP%+kCl(gyiE_!6|4|nRCT`#!R=NbeqWn_%+ z&pA`SW(UQr*vnXBBSg-@el`KZwI1PR+-D7Ouz=IZQ%1-ba%sDb%D*B1{GU7e*XBAf zt9gIz{QShS3i466K5y@myieA=7v_?--gT9Bv@cB4`;`f;u$L5 zfbLLj&jJ9~iZ$&~mz4oIhW}qV{rk^z6$y%*h<-Sr^+98O-cildjAlO8`OJFA58PY{ zag;@!@sh_2aT^4aftsDOPGHcYzZk-nrijmA*b0$_&wme?YY1msB&2u$cwp4#E^{Ri zk5g>48}N*UB^7REZ$S;mz`+5Sl(4-zMrgpWjOk6?R^ur8Fugvhj(q(#P}I~XXi)#} zJ70&rMv~&N4ZXXd)m6^vr{$c{vKc^Fk1`U1XnP9aY#BTTJ}5gUj=&qDczmHJj5s~G z!i*c>CM1co$g(T?-|&zK^2dt+4+8rAXcmq+Sq{Yd2N3LK*WwI&>>Io%hR`6lZ!jD& zIzFRme`BWan9Y_-P8W{UmH)PzF#pHgCID$sZjpqh6c71Ui^Gx9iR7aeqDx+Z@!0<+=CO(&OK^*?VnQ)rA_tBz z=9HTX<-d3Y!G|Zo$H5{!5rl8R(RPb%eB_Tt({P`Gi}9i1L=j}y+*r$ao2fP5{rs1N z{u@< z5-$oXAFsx7s`-DpWyR|#&3|H*|fj z|GkXdZYadXQ}=p*ZTPFme9MQGwrbx?s&Xx{kd7HxAB8KZUxDGd5{Srz`ME#^i7$k8 zO9HPh5u^7adBtWlM~y3jq>_l)XGYf8AvHa&joMm&8~@+SrnH2+meh68%>0`u|3i2Q zOe79M5^4Hqjl#ce_1~5=SSzq+aZ}WzT=0L{)BlVB`3zGUfI0|2Gh(9g-@N~~?Ue}H zgd8w@o>OEtN`JF7|Hc<=JVa@duC>}4|L1NzA&5jhzR3wd_yRubamwO2 z(2I>dKQMsuK@yk|vC#JNI8`KWsXQv-!d)eO{YZgI3*wSGS^6wN(~k2&|ESCEE`WI* zS%7C>99Zm4aCJaU{@nJ)>Mz##E(Z(77us%ea zAO)=x29{c55a@LD9L6}dXHYE{1CTEZ0AJdPcKQL)IcNMptdJonl;A!7ktIQIKKf0> zSv0PkUzWLR3%uG;W)aWR4#Opc1H3SP*BnZc#e}URm=VRGT?QbQi~SRgs+Ud}yqDsN z1588Y6o>c5xSrt!HH*+|4-DUCnsU*=SP)!>#(4$nC5d?~G0?H^7w;i+MJ}`cLGfG=ei&b?8%`Uoon)(&9YGalN0pyM9j~ zh2{7i3eKDUe9(y`d&yjC$?EhaoEDnhHmci>sPiR@ld$Y`SpA=@3T8E6>a2Q8i&~DJZGymJpZkE|4`dOsF|d5E8RUi;bAD6pmB8)_h|iz}u(NY7e`!DiKji5r z+gVaqgB}nIfLG*S%{Vj%24Vkk!=3=M;?q=cz_L+I>jEnk`8}{$kR_-MSzIMI01i3Y zR+~P!HhZkOrn?7B5Ogna8psmBc7eNN@sEQU0dCOTMOsxM>x;z#z7rpluno*&)1*g* z#XmT@_!*!Pt~sR-rTO}ntiUayV7R@A=rh0l)y)k&F9kzubA810ntI(%mJ#^=Jp=zoZq(b;esVa#r6`% zuA7f3h?XyCc%$OWc-)R@2l>!DJ^QlG_% zk=d=bJS}nph^uoBmu*lXdxUuB{YwwQ|AFLYgAHuGmUSQbPgV*xrklBH`QpOs?g$l& zyAk|^gZjI<>>LLUUfJjnlWZQp1WXs8Wjhoqf45$9q!*&)K%Vwb<9y83&#jn<+jvfC zwOEUyIE;tH__cYqAVItyocl{M?_u)eN(EgKNXG2b21iIuZ! zJYsY*hlrvM3W?q;1xaAPC&}+?Z(9S4>9Tf1ry$)!j_U&M!Pfs=!TyKKAOTJF(QPiS zY26KDFz$U2)3g)|wpkY>g)bZF z`5`@<>&I*-MvF$su+825NJ5I4n+794EduKf$iyo-2#@Xil>dog5V!U9x$CUM#4PkI zZH!|wH-}I&5Fm!Q`ow#)akO$Ox;Cfl%kcHn-xu&_v`5))^SYgWpR=AtmA;^yLzt1j?=zGm+WbMd)7{^{3B77$J2SA9pK^a5G zj#9NgcJgC{F>VA~5CZ)DYfLD>WDTu;8J8Kv0jwm~E0)Mb@qC@oG=w=KIoafZ3-aIB z7XOD2d*wHGu8#JLfTkRpe_~Crl@gBvZfm<6d?7W~1V#TalSGgtPP5~TU%U3F_zZi} z`lIu1h~;62wQp5OX!+370;8 z)`#2w4r6!53j(BW3IHc@K+}Xdyy=Va7}r1PFK`3))#Qmx`P8mc!KdA?(7CdFVu=&~rfry}(}l@&=l6}! zp$rcr^;?8|N#Oeju6LAls_l8GqYC?5Zy~mdvK)4NPT=N@QC;_QS?v1^SK(PTo>nz< z%BYq{mUU4#@;D~6fr6D{qpxK=ffW>~Qk;as9Fro*koz$d;1VUU;e#(UCO??Z>x$j~ z!?!NVF_M72eZ$qwv5bKALyYc`O)7{*AV(6Y{A6OT;niWnktgdu4hdM?8%nGz%>8Q0 zt-=4Y3-#|_0HuvY3r7uSh&WIHfV)ZfnOHiqeezc=Xui7 zOH&DQ68B-}bBHj)%Wc;wt*<&kc-gDmZQc72<rL>Ncm0FtJBr&hvNfL=e;Qgxv>HzXXS@`4g8sAP^vm^;P_{Ve@R@l?K|qVrMByGG+O}4dYvj5ETn7pr#I1eq9vQdlG8b zOEW3h<$j=?`=PYBcE^2rh+azc&YDwU;twtvc7F~nXC_b{>`b&~=k>O%8%+8UjZrCo zuL(bcl|1Ju*aEY|T9$6eL{UD|b6{sM{$jMP-M|?l`nS59zZ@vu?kAWuy)(CC8<;6n zTb&L59rEJ@O@mT%zUxrgLYl*UHkW5_oZ`-s)Pw9*uZGx)C=5r>^gS&%b*_8QQ+ZA3+k})Kd)0eqtYZ_IXLMTplt7t`W`oF-78&4R7-CJaXJl@ujNaI1u_>8bCX)C! zm0EcSOA*FgMpQ6(P7wq>eG~`XGYA{RB*y)m|6UIT;K+L6S)y{6{n57*YHO&sE`)pP! zC!@v{<{(1@_gBWS<)eo>!|Ra#j@cE+3b{ z;X2=yHG+U2tn%&xPU4RyQlRn=gQyI-HkS>fQGXacHPPfCnA{VCh}5r3oiQ- z>$2sL;`%aYy$7$)r{MG#2?KElf}pTD2Cd=5WZ-CC2ury;fE44I(bx;_7WVHZo)qGD)8{+wubrbA z!eh3!#i7NsjlmjGw2)KyJH2DYWQ?lQmEofbFWUkSGg?y60-+yw7|8$u)C>%bPCK;> zlxa0S5eF7Ve`Llh-`t0;@xSD1P*dN z(lg8nTFEmV9aLTa7ypA*?dIS!PIr)J!kqr!`MZFIN^2$?7|d4YK^eU30Tks|#f<!?|^&SVChB(Q{mV-~j0LJw5SIzdYt=8Eu_%k7m>*;~5 zkFf2JaYW&90i}97`$}zCePJ9Cm@x-wZbcF$DAL6n(fl5`o5dsx-wPBQ`EXinE^97r ztX^$eXCh{@3uA%U8$JywF$e*1cA$)EH#JUDjnUq84P{gFD0ly&Tcz%d0FeBS!5^B5 zz)nW2SIH`CG68!>K5zwc_FBGuE1h2}Mkt92WO{s7xgCv-GF(xkV;TM3j{w$DUSoF@`)Vz7g@8D@^{7!;$*P^U*J5{Q?#KL&_Ghb!|$F2EIr2iy|8+iA>5Kv>Jc zgRJnGewuDD*iQr2c+W5O8CqmOJLGfUjpWPLNiCCs4yAc+5rG@nr{?jlLc7?Aooe8$he&e=SE0 zsiE@HSzg=-m zj7qKaocqR*#u2T;#4|+a7}M8%B=|t)+$%E`@!+-G)hzQ<07b?g1otq^MtF%C_QK~L zoc*d~_8h-Vq<7~eiLTs?bl1Fih*Ceqmi9V6@#<-}l_4sJE-a<3lmwW<+Tc^nW8^4R`~2IxWyqMuwLhzHJ@ z-~oFy7$p~9Nsk+vZm#eAxu!Y$r*qrKP0E)PD*9v`4@@($OjG?1a_l8xhmvmi1;_FO z4Pljh9pTs`DS*`dr-#}Xdw!PEe$&^1rznA>_~z2PytLX*9jsUcWnnvMDeFTpOZ?2H ztha@>X9^IcRCa?l1gl_89Aa8ao{dDB8;IKT$0#O--sx**4|;=Hk_>w5y8a)!-ZCn# zXUiK7?g=iz9fG?<2PXsz?hXm=?(Psgc(5eUxVzK12G`*3uJ0lLJNKEH_s&}Vq3G3| zv#V;?_TMgEvy*q-lz<%+$YLNI>#aS#_Vmzcvlky|9qQxX%tx?%K9y`dm_}+aM^lHP z*|M!(Ym!@EiGBXbly@WuZ;RGQsbZh`r2r|G-lnTx%YLt&qi?w16_t23ZOPR8bc=Gf znN#~Cwq=h$#5k_1-!yOELPofkdaJ3>sU@okWmo_&%wa0>b`9x)-Iq-bk3R72PmZv} zMJ~^ZRs-Qg8p|!jHt`W|ET8QRrb18~a$i4rEuQZyU2P5;ao;FH@#9K6#BPsV$w(yc zUMb&@@vkYTGZDpZx5Em8RVN%kg!mP-1NZ5YZxJY?^u12iNW^maAB-^Pf)pU@?=pBL zi?Bf^9J}wYNCN{fps! zdhR*i?X<2u`?p#Q8ATVPdD%xe{qtX=RGfd{B_TYmWk@udBi3?U(5+6#+Bau

qaP{fO1$hq`(*Y7&Uset(>B?GRp_!>8BDql>?eZd3ZW>G|~uA zTg0b^s9*;cg`83>un^}%-v8+YtWiXHe_UBYEk>Sck96#lz|~pvn_{3z7cCw=)>m2U&oL3Z62O#P_L7+dK)5; ziA1VV;2Ja!-S-wer{1y+U5ou3Jm&5R%FSLprcOE0Y1Kjfwd(&Z%m$Kq`N=?<#uL6y ztn#ZBHeiU^L37(DEW9%-=c?(UAP>;r+7$A&UO?Z3l_q>KJq?ihL~0&kL=%IwWj(F3Pb@JA!ljSdBq#eJ2EgjbWwg*$T<0W}C`vLr&=xOY?OtvIAsy z=EN=}FQ>XW`X_qFSUO(goB77|f)T+YXih8(`@;NgV_Do7yt1C^D+5a{5B|boO+SdQ z4<>E*>}=h;-SO16JOVE54OqQn3aJZ37Y1rep%EMwiAo5p9z0uv#&Ae-{a06@c-5bS&h1-Vf(&^$f@QXR#t#+^qPe7A5p^YKBkX_HDUB$1XyPoH1?c1J{3%=rO5 z0#Vi8W6Axc8XMc4a>Y>6m8e%K#Ke$#KDb<7xm5W$@!sQQ9RoB#7w}1u!$!SR%Fofo zz9N1hy)<6$e8_~C1fi0cD7)YERFLDP)`YvHgY9p?ruh>iqvGg|9V(%FmEJ!wgr=?3 zVwR3?QaYZ0fa;M_?e>^Pvr0i`9TXpT!*m+csK})R&C2wKie7z|%BnPrm8t8V{EGK* z3ZsZB8GY@xs88fH+4+mrY}@d9JmDvo>+jC%B#&cF0ja9XWT^!XOK)ZxK2Q9nLkrMd z;+NFK!tldnjtt@QeKFol>)Iq}7%Brx%lA{Bp6Dt31ogYJKWzy^XAwE#DN2pQQm1Cn z1F$~yK~~eO*m7rNC`uy@kxcBmbR1vLjJCcO8&d2pTx18`38Df;HpPDD7!T8Tvh`Qj zE5GHc=E|2TR()a>6O5wj+ z+1W_}C9(>=Rg63FgUPPjhn;5&a0@pl!u7@YUYQ>+I)Vnz8w~`0?}> zc$`6w&9ph+E*N#6yjz3VVi?rEzu>B@qZbL{(P0#`Try|&d;u#VszW8nES&{A(*D9) zP>zR~|ExILAm(im(KvXSQg`{h<$Ru483+=*`azlhp&GRodj7D4g~-?FZE1XJ7< zy#AOWw;|JF+JuBAkugQeI7>IglWnB-o%1!CfxZ&Zaa%?`{EWSugupeui$!~&holj$+0vNkmM z3h@-Kz0eDa)!PLVcha;#f*pVGMXgvtX@Z*8{Eb<4e)G}{I8wl@LtL&!=J9FfqR8XR%XYP7p`LAAFG8+3t$vYF& zaC@qiyGtDtW(F6+yBuC$Tq74#!_y0KY}_KxHRwS$fvW8mv4lbrSka`0H9I9gLRYZ_T8X_sxT} znG}gB2eI=s9#iu@O&WMYHl_&L?yK>}tSn`DW1dCDz${eR*nlYYKWzPuBX$ zY55_NECjW6faO&stz_}=?2~GzKT}(Gbr@t>Ywumpx^0q?SC8xfULYAi4)`4$9T{wf zaj%78-z!t)sV|$4yci|Z;6LOaW7$y zu>9tg@*dfj=_NL$!V7~NlM~#mM&APIc9wtSu3KODTnKD7Q( zIyoHeaH3^BT6&E-S^^lm+wSJY^sGD3)-+K1kGA)OSkq!cmH{Rflx@e)(2G%?1-(CIzy3B}*AyANAn zu;dZNQ{{3$=e&oZ7mVB50jb_U?IplpbDO(fZl^Cp}Z7#B+St7unOv+ncfPN{sXy#nsBsjD_EvvfXPcjOhaN3 zsTi{kg9f20l)G_`5V45Lc*b!u(Bf%2I@&$AtbimVuE{ZeBkWO7^W$*fGUAkxu0Hh7 zl)EU!G}79fVt5&N%nojt9Gc>_#eTpL7~WB>0q1hWH^$tSH3yQ8rp~M@*-udiOCbkV z{ISoP`+yAF>WAo}@lic7#oW|+O;Ut{U9Wj9F~#AW#r9zn@73uectXoW$0ci9D_}3EIF8qKHQev?Uu}4%WGo(`MM_+W4cfL*S#I@2=6D8s!iQa@{$ z2CBt@=*^u3NGE&4JKmw}Hw%Kn_m{7{u*1%#zd)RyfvXrs7<1~oxs*Oe)Idq8kGyw@ z@MIs_baRpUDwTV~>yDimQ?cdf2FX*mn!fu{;oXD%^~(>BHXj)_GV1|BE8j=N7~0V@ zpriRA{=)SA_ntVEWgeF*YNR?dq)|+TQMsGR_4Ay)S60HYc*x0yzGs*ZHd7g;o(C?D z8;bKoKzT`~Tx~Ek`8xx=7HaSrb+5yVcce%b2WPUXqF3zJg1k}H-S}ULKy+m2o>0B+ zBSWJwoM?IhEp2T!hrJm#^H0b&07UF_snm(0R~DBibqwjd_G%T#`sLQ1^6c5;M{JAL zgTAJP@%>4o4xHG-7jT-lMFa1&oA{e>Q56+zObW^#WtCHWHX}is<}XAOpFGYH+X$M@ zjml5lfX=1&R7aU#|KJW)PycpUvmA6IT7NB>Ix?o@zI`7eP+IKd|WL~A)W^}wS#0bZl_YJ<~; zcRS8~LjxF;ojw%&qaJ3TSs2X`jvR$2vsp>z-V$5OMsWY?Y_U=8{}%$wLZKJF93&`+ zoQPxi=i{6<56a+-h~EN)s@>^Imr=8&{}Mgs5=@W3ZI4*OYBye~NvE~7D1;SSvkNeI_s^3ET5c zcf_lmuO$u4%D{DCr{|t6$(z<<-w=sLiG0uWUbRPCLh_ zU|=A^o7bxt?sP9?!%&_qI*ziD39$Zt$s?|s`%96y!~RgnzH{PZcOO9#7L5ny8rCA<;Ml5G4I` z4~41XgN~orVOnb+=nskcKXH6zt894&OB72g!hKRU1B2#DH;Jj8>Hy8B9Xmgv>)z3t z*)eM%5(pOg%PX)2engPH*83x45I5Z@2?MaWPPFZQR<5D|!~q#Lw|#Rkcb9#%CulLCGJbjTL~-0(Pu zd-|My0Wdf3Mpbh}Vb2w^nY+?MT?{JJf+LsPzr}n8185sJv$wIcG4B1j6$a*TE+v|a zrVAVZT-Ejq^%YCzcRp-Ca%+-31w6``bHx$04WBpqA01&cuzG!<_(m?Su7{Ai<^dPV zHnrM-(qiEC_jbz;Kkz~+55oyWbuP`>k%i4mxFPhq>&KE6{XeGzs5@XZf__C1=Y6}G zayo45<2bh8^_Ry^Qrf|qRb()g4(?ZT=8vk3gMbT3H+BrIs;c?{bagqS59toax_@*N z1hByL6mfU0LZfvyAFL6ZYOvYHFDP-a(-`)4yJ5#PtuPTA-IdYIU8;tkN*X_1T^r6>NmkwH8F0SE zAU?!+kduZ4s4n#E^ZnKXcPClSD7v;N4Qb+rf2Zd$*a#b!mN}b+tcRT$i}ng0c&)@R zO>Nt6ItZnSr;Xz?1ax$LP5Zsfpl0^*oByK=3IIseV3hm{A=}#!a!Td=@_PCxBXCN) zSV_~JKz% zvGDSf!|f8RZQe5*W+rrD2VL%(um7#FGbo3f25Sa>U4xGi!$rONwt5PD?Tfr~>A|Sm zh+Snqss>$|@p~}4kuegeDO2#z&!-Z+{H;t3r5QuMkUig_?qULN90|S^>1f5;{K@sr zutv+UsHC^QYvF4iTLx6+QTq-FEZ>xr@dr9Ypi&pv{Hyg}A?u%mgU|0o_w=~(oC!6T zBNPbc1x~jd=qkz-G@btXC3^tEO=UHNhN<-T3V<)M8LzjF*bXboWj773_q`LRqV1&< zL!+X31Nq$`tI|c#XJNaL(G?DcKYV~2@drv3GaNh8Uc_DwC>0sVB%=0(?Dm^i(W9?H z1H&66vvWH1&X?gy|Do60z~IKC-{PTKb;b^q zd0oa2I*tncXcoCYs2UJ>eksrgk)iOyR2Hk1tmW(Zd*#Ac+92&}lv~g~-JUZ2yji96 zx;WL;X}lpQ+S4Mc35)1F`D4sVE(AkRw-oW4>NTc7+z(p(m>wYEmO(xJ_2qf^$jvuv zk%*f}Sj%qfXv0+leqb6DEP8Zy=%l2ixUvPCCMJ{ljd^r;w1i01Ru5)}Pb}~zLaI04 zpgX8fE+!T7T3dzd-z^2sX9;<#aqz%YrZF03e-4uu?FH_@V>n<#9HRupyk~dM&|d}1 zstE0Qbdii>cEeLBkpytpA}gq+B6WfIQaoE!P9ROO>BG(f8er+@>UUJkc+`{ z%jsuqL2V4n8Oj3D`19sTdWSOMbT#gSZ;Msh3jR943GdMzVyocggQ} z@?tH_O#mkk$eUr=!J7~g5+=#Ch`MJ5zRELqrbEXBiqu28znD<|wNS_s;qc?oHme3} zmB^Z3hR5fl)5>J2FT<@WNTafW9Z+pftm8|P32ZU&s?`L5UST#ipv?<<&Kv1Ht&`2h+rM+eH2`t@ zDZDP0^nZm76xF{{Kra60Pylf7c7ix!HX~Tt%RS_X1>i9`SiDX<=mUd;Y>rzWy^cLs zP#W*5IsulwL3RV!qvi!N2!djQ3RI)`H?#~VMQ62kbZ7~(bkgJw4KbDS&z4N4>by_XczYSR1@zcYC zf?%n{Ibb(&-i>i<44!HXiCdlhoLP=A8&WU_O$Qc0NN-cdjM@Lo9K{!eA_&uMaG(ZW z=8EiQ0@RL;LIY4ig2bMe#3?g+mcK@h*Yz7Vd&BRe%is7SPXiZyJ49AE&}5<{Zx6Wf z3FJ&|e?kLy@k#jdQwCHdz~_A+cm5bdK=|oLLiCIB1{_^n12XQ!QRW3nAbS7|1xE$6 zJej+Hmnv?otq6d~^K4Yj*zZjicXKYxXSUlYsYqXE8xFT9clAQ&X} zY=FRgF{2IaVDZCXfidhPB4_{BmT3kNO*=v^$7OhTH6A<<0mZE zUYgH%B>l|8^Q=O$rmVH*dP6S;?2AQf2mCKWMg~GYz?PbR0EP?R>=a;*HWXI$1@$!} z!MnjcZ>FI8>jaR~U5lHe6jUX9CPJV1QJSvCZ4IY6^vXS1dz=gDNj<|qgypos0Vpe5 z!(;`2f4rZ?#a)pst-L1)B<++w_X{gm9-Qz-EfaP?2?shAu*e=FN_^?E6~4RJQt-tD zh4UXC3>Vtgo~C+=G6$v)+xr8oKAuh>Dc8VY!7GxzxU_sJwW+k5G8PFp z7JD=nNH+7HEAa^n-qXL;BGz6{u3xA-^6(*yU2o(;&vgq^)`xIOWL zYWVF<0kIm^$baR=12Dq>Ut(P9lX=pV^UiRqQTATZTTXh~{;+P0t{yfOts_W0gzphb z-q!&t#WKHIgMyOH!GyoJU?nJh^3(6-7@dJ<&l+6q;VJV|n_bU{x*^7A;>D|;g|A=V zdW}EWVB?@X0TK4U%%uoxVPO%MlEOm=A^912G*^Yd#>Q6ErQ7I)q#%4t1>C-5cXML6 z=scraB>3?~vn#A7sq6XoOf0F7u3Dy4^`13ermsBo-i{w;`fKpK_Q?T_Atxp$hg>|) z%~lv!IPa?sp99HSn#ecd@E+jD&7B6S$;UCwlUmq zC+DPJ#so>`5^1lL(@k1$MTrYv8~8LNn?%lm<#S(S!e~F|ZGXxq=snm}Z@KnJ=eWRy zXF4pv)=y2hm+hkQuT=OoZ`MLbh1)rXm0sE*M$XY;D@`-22u9>0Kj(H%wNIOig4wzI z!9peO^)_Ck`iDz;sf;RpH$L;X)1IS-_g<8G8n-ZvTH9*hV&XaQY`gro&*E^!F-FLW zEJ8Wd|IsKw**l9~RGH6ZG5Nv-;Dfz;uT}y-KvIF)ZPCvf4c)~G5`-wH>A zeoP-xYp}agye<(5@Mfq~6=E*gezSgL@AwB(YU-UhWeJe}Rs$jRLMw$IX(Bt~{;8*% z5<+J<56fz;e1G^%f1w}%=Dqv9lv>kxuZGQh&-Y}i#9ip@gnc;J3X1fsN>b3;9t862 zTe0bIhl;<<$$Qo^aMlX~jIhfxLt~0+&6PE6bt4OZAS>nf9?{iTAc@M86`cvQVRl@ig)oOG^zF{BCcPK^7aU{y|MXxlrM5k9O`2Icf;d zmQbSAhna6((xC6tb#A;>b*5JxF;}I5sffWzVRCR54B;hpJ69I4P_VkBN8kS`ovW>| z4zybtoJ7tp9#KE6Ab~X^u1S2>uVhAe6D`Qxi)?8LnxXrqiM80mG{cucpxbp_=Uq&&@y$ zji2K+_P3ZOjn2{4Mk@+tUoK3ByYE_Tb8{;(f5aY3lu9N|4?nVC_#EmK#p00bJNGdj z@P3ML+Ymit4CDc+f;PLDP{{8zFUnyI&{}E>!7p&%6J%C&!shLm7latm%UmDy zMNcbrOh}1i#ee5LC1580gw7pdud>@XowE@`(O`K(S_^lIrN;7PkT>x0xUYKtbzV`` zfLg)!&2F^utuLc~OehD@``;+XEr&|4%hxX~jhgZ@{wByZZT0H=nO0&__|NOD?QRvf zCJGn1nof3RAZrKOp(o5{MwN7vzK1`1zG063L8`QCcC&^k!hYG^P31O+B6+@*$>2p< zoW~G58f;~bJ~W#0IOPI6h#v_0oWNbV znWSKnAEhZqB-Zms!S~yPbPbPlM(5bMHL;t%#2Lf&nz2{2?O7F2c9@%xJS0FHl@|}8y(1%QVkaxmFO%2EQ z$eMsPRJT!bR~&ZbQ$XNKN6t$1gLX46o(O%9!O&%J1FxPlHcDXuiat#IL{-J* zjoGk8!}Z|w5GHy!fa~o3VnF?mfYSlAJK7A#sEaaLHC(OjO3XJ7^EKceJ~15~ozFbU zB>)gsRQ|<`5G)wH&aJ%lW=u`out#TjH|o3g72ZTaA$U$Da$YMXr9UdQ;yGu)(qcSK zuSiH+lS8m>T2C?d4fx$}S<&eqp zGTw7D+TTidX)-(pt9}&YcYZg(D0J)YAoiptH*c68LpM`3n7gFQ6(8rgbN_gr`i;Tx zOgF7dwf>l}J{!QKEIM|j{f)(f@Dag!EpGwybHGH;b;(&BzdBV8Fortc1GBK>9kil^ z6IEW_n<{qO`}Kp>YzV*k>24oiz{Tn_w^O+mHFQ`Cmq0f6`U3*<=yTnSt?~$PX9x`? z-`r*re&9Gqit`+_Y-GRjJwM}u9X4@=tTLZZ1MdoDY)17!%DA9oxs7|Rg}zjP$bw;e z6atAN{gX&WM``1A0_&-w=_4uzA4hXZ{f)G?Lbqg5>osd6BOv@=b|xlBH>r0e&6ZI0GD zzS7$-Epm;Q`wbLP+pXB!qoP9_RvVj3FuH5vVC5d46yw_aD5E2|dau1bdpF-&$}jVu zuvxCH!eu48EexL?>-pxw&_a81Dwt*Uubk<7_lwj7^46=Nf5cE2VM={TINVW`X=`|F6b&-!XSWjcYJDShT4`xy9>&T4 zNU~uGw?FVfQRAmivtF{sOW)J`3AX9MKaa@=6R$CxyPKnF?YyMk*Y#|Vwh$~RG`Mg8 z8?rul{-2mW;CJe*IF2`cz*)Foq~If&e8sV&;wA(}tx+ z6CFbEQ8a8s>?+J;*Pcu@(EUh?D}%HqV5Im>vY~42)h++^mG+3Pz)BT6D?ch_0??P? zJo^>U?(#3KFf{F;13#s6&|h|X`L3N7YB~G1`}g`s4u_QDUfZN#M$aq7?I5jO9@DME zJ$tz6O}sBLEDNU8LqNY-sj^w{rGoMORVMFUE3r${y0g=>Ja{1e7;xndaQ5n4+^qAV zz9?@Q-tS9s>OfpL;4J}eFHwXR_BBto3^4YdgeRJMi$1FpW1fQj>s)~(nDqc31>Sh- z89VhQVe}8*{EOJXG_Jn!|IH|vT&$!x?(uNiTw~ld`(=)JpP0e)*3Dp3SjE+Be-^e1 z$y|Y`Hw1EaGMk*Y(?>zm*D~=nXyWI5N#me^&9N-HP-oGYlufY0Y z*Q<Snq9^{`Xt5~g^0BuMO(K-zg^yvgiuDJ22s;v#ABHNE5Fq(=wjZC%JR`#n^2 z6m=?)n3C5yxZqUJ?hHYo%~hnlj%jYf2q8A^svLg%BU|R*OZz`2^MoheK7&_=5OF3H z!Q54tvfR;(!;yHA_+_Q{ZpgIn+U=s%Wa$yVl^LsHuq{SrT?*Pc#lH4}7dcWlM7Zw` z0j!sXW;zxablQiT$*s#X2$P6y{V7^^U+eo&2DD(4GnZNZm31FTe^0J-$`x!b*TGwG=RK#c3s(Cuk?7fTtCh>IPp2Kan>Rzw z!GN%S8qWKMg!Z?l#>nf%lyM$QbES?ek{GBR0|6;8%Rf%i|Cky!qLzQ6z)_dOMi+;d zM!bGlLhR6I(sfR_*n`=ShhMUs^O_;Ka(V$CACBl!yyMGnZd){5&L5e-ER`kWlywcQ zsoVdo#CH62kI8(lE+DwC6CjsmeC%PO`CMh|@lCjx<=%rRf8AL95Swiy-Y7b&X(Y4C z&cw1brIG(U=tv-x9&yQWF!{fdi~lhCKbzq1gSq2{-Le3KKXE!{c9~k9+r;?)Zv#j2 zXj9r$II^bv-lRMmOwQJ;*QfU^S@lM@3Ab*wu+Ty!o6 z$0@*B!u8v4(WRtS00^|#h+f-NX^2-FT#DUzjOnJ0H=*h^vsyHk+)-1n7LOcJ&P^AX zL?2Bi9os8*%N3BS24*?&h^^@U03eWA+iGZ@agR78z*jTZ=(yZ4DBL{G#K3B%!HTcM!oVd);3}TvijF zy^*&Y70?==AWdCj%95*T^uED_v{AheO(<*DSy$4l&FHC*LM(3!>dq#6th=95fQF?H z?z=Rrm+g~NOu6Ya#BdYQ2DaG|NwoV*eoNQ`&HXsh5lX?MAy zxK8olCiw~-vxOn51?3g+-Qm=cog$_*Ze$B?z=Vs+q#Kbp% zt?ce8zW(i;s~)k_w>^m*npoURwqJyeTmf@no3upkzl@&h?I}Dh{0t|la3||^H{#;t zJ2#%bm!dLf_+_pS@MF4|`cs`Pm#Mk9Ux&cL!$L`X#0mJU;E*#vBc6}g!|n2(1Mb`0 z?Nh7;rPe^;qd<))xb0%f=nqT6FjKeOI`>=3>XAlK4*B;M+2XTtm&-AfDl8Y7VHXEF z&$NkH_pU4ZS00ON$K)L94|;2pGbKFMSA|yk_m0AQr$W)^?~}M?R@rF`n&=7X`@{Px zeNe=dOBvJPK)y{IPI9DaFNmo+1iGM@lG6d@yWc=sJoyQ10-XE-FhWrnq;7ymXMctj-ZL)SN8OfTq%> z@l9SZkJ=Pb2PKnB!{_p+s{HxE06PwIZ&=AOv* zK3_WUlReN%-?|cF+eZD;Cyv(CA8o3J?;DS6rxsl;M9N#eTjL}ugQBM_?db2iUPHpkSvu*ut1a#7fMvw$ z?Lq9#*(~WQ-zk)rIZRpX>%Nie3S}|uH!VuH^=EtG_}ml-GGbVEkVH1R%tWTOp4W$Y zs%kHbr33qk)jnPqxSHj#whGs=aN6RW;=mdmC0YgpnIh4M8|fnRm2vM?Cb4DwD->3d2RO{qW3GcV?01EwKj5cPZGp?kxV9y!biV~F zQO2L*6|A%RpsrZpX8vfVcsIzG@zjh%q8{n?>Jnqb z%YHE%uML+aj@tf^i_w`gUcwDW)}!>9ydIPA-Q$2^ea+3=#j~kmw&v|u$#_4jtU_Qt zqzuNoqYjtKnLmC}z{1r=R+3d>c+D^8tORpGZ;?PIDXY+13zvGO0a{TUeOhTU9qW!pdHr(W|?rU7z-}|JA zX*w}CR?Yrk2r!20;RoJqo6fLsK%4haUABeJkC#6i6S;;{J9(RrJ6HA`zb{0@9eCiH z<=N!g#Qp=94pb#4N9jCg6m*9&WqL6^<+GxQ^}IV+1SAEsF7&|su#}d7ea406&Y%6Rc82$Z#?wFF^9m~aL&7|*NBjN>H zaJx$*^-TiWn-auUYrCis2?#BD*jC=D=(^Xw?*vT+>@Kie4ZacmN&2pHk-hsM?iCY` z@~klK{Q*`H?-s_MFrHJ#EjFC+)t~ptjGgFI`bjX<3Uj5>1QAB|r)6#(Tif~ef71PL zU@8@;#!#S1Yn|7wkDbHZi(&LVpc!(q1kQuqdg{1X&5>BxO-Ih$6|Ot?f|=4-Ip;*o z(V9L9;(ut8WE`U20voE|349*{O7KU!H_A-b?hdL^7T9WEw2uLz*PSL_OxW!a%;^`t zE!;P`jbs<|CtCsx%ed&d;F(E!66pr}tLmGKTTDPVg&!-6f`yp2-Yc|`l+vQgbjEd? zP*D{a{H!m1OA#RST2;9Jm-I(@tO}dSI8H%b!DABKqm;A7oxQJJUtOJxd|wZa5|ERG z(1ylxyoXMKhKi^-y~SxE!Ha3W7hZnOA;_-wj&eE0P4tzOmsFds!OzzB3kc^S(`MWo zh=yI0^E0w)4xb=FiUVaMWt_R4E|ZF2Wf5fOX?h9s%@xdJih|YMvYmb1F_%k6AHF;X z`|hmi>)fE!s4N`$V-rOrYzqFh86J zhn*(vmv+{CQ&;pu>-fWiq$3K8%~BaF(AxbS7cUK`Lv%>fym^nvNj?nIyj75Vi-BA% zES5seO#wkX(s5~Eu`oGWBpLP;)JL&Z9A`k@+T&H=b}$)m682xc0N!9_8H;s$;m(Ip z(*$jUI~ScsG|5>XXrM61(08t~x@c)zyV>lmT^5~@4@EPCP8$$t`Grn0!q3WeblPrL zTrh0~GqFfl=0J3?rmxY40+Iap^U1|n#e;AaMFo)HxKD^pPrLv0uSHW<>`ec}pzr%o zXHb2klgYNo(oOTaWe89A_k+V?12&RkR13DRYRs+!CAv)1CbVA4f<81v78=IS53@;D zlT+%JZef?heEe=VMN#yvm%~|N{Y@J(L_P}HGw(|yBl8{(3)*H)LzQ{s0|~*l_(!cx zcu>R1^$Ow*%R`Tx|Fd>;aIwxt@wGJ(DX_etBw`t5kKFr!^8RSR;F_Sx$=DvC;2n^f8wqzW&<(>Tb@lp# z_a!!*Z-2Jmse-+%UzGRh_QSVg>^GHTUewKf;zaZ9Uev9lNM+guL_vK3*AM6fuVzqh zQJI#WjcTqLj-&tCEpm%RBc_4X_`{XVsv=0n{w;Ju;wN^S2Kg4_=KFDA?xzR;*J-UtuoLvmk%OG8(qe$s(V5l`HEd_(LKUrtFLl zL1IkE^D09`Fp1q9{s;FnQNCOCYaBfFE^&bYTA3inN>nUTduv`nKBeX_i9ywWWTbJj zIWex2B5ihk;f@ks0U8)d2N{q%ddO`Q5D;`=2B@+gQC%-#s_mU;<2|EJ<_`(Ms>vH2 zu*(rH-t&N5JD#q8?4V_-Cxj5hE+6n{nLr+^+Iz=W;gsO8AA=;)flHLWwN(AVj z?Sxbq^_Myo`TBs#pYX3y)eIp@PSMhd2-2%(lsSlL16>50=QYsI4|;3}nb_wlZX=kY z!Ig2}?+0zX{$v+JpC?I$;Vzj6U(BO~YDzRKZk3MdaOs5Ms7J)Is~1G@Sq7#4nCRJW z7|19X(hWEASqEec2>O&JN2km*BJ}+?^>BpQ7m@wX=k6nLptHv-D3IdXZ)xHfo2f-} zrEKRvHM5Q&6^PxEPO5)WpCUo#wxT!KO%08~1iTPuv z6)58}ym`#4AgopF@l9-qgHnlxUnT0xO8)zUiV2NvjL`q4#|F^g-SQv|wh%;&K*Et=#Q3wo1@Ss{DF0FkN%>VCU|9*Iee;b(MkOCj3?EXn`*A;ZYQXXr49{~t8`cY6LIDWYOKlJO9j%D*Z2-vd8!KO-<~ zpv?bG5ELe_K0tOBOJ%j}|4k4G8q;5#emFnG504}kPx_;H#Es5TI^?iZ#x;dBkeUl- z_ZK}LYIt29QrqpJPlli)Ji+ax-F3BBUt3s{sfgZZyEW91?_(}39~@Y8R+#Z&xyAqU z%V7Nd)IXc8i4B8oMc{>uhwGhx=P{Ba;YPwG=Tlh;?r<|WYi`sWu0#w>z-mme$0a1J zKbz!hDmJ?@2;v3!)e#IU|^d^xCnf;`V&oXwUc=jg8n2 zIL-fIhx}zTK!n_C$rR+|&btFvNa`>kP_n*eI#s);wG(}H8aq6P`?@Zj%n<9ug zs~o+qu69j}&e41K^;o7`g zy|$Cil$DQn3Fzjq2p%Dul~Gn19Iq9MObi`E*2jGGTa1NQv-9Cr~S_B8^Uus?bN%Ylm8|^#eNA=p2z(&6joplTnwpz;8Ccv zWCbBiWQ+3%yQ?pdWE`)Z+ZZ~M+3${`vw(E-Og87EE2DPbvQ0FTswJw6L`mU2<3z9z zht>UAIj+i^CMa6+BaRge$c?{H?DczmIyn^jv@&2gJoyawX1PKJCNk}U+m;KtLXFT$ z^a2|L%J0-&(Wl@0`H3~TkM`+GDhOX2bIJvbAT&H`aEEzGRadece2)_4Yx&FA7V{Cg zzoKS#rSdi0ZLdQ`?V!VdJV}9pAjlC{Us@}bk-54RUyx}P3I)jw>E~bifz1S859TV0 zy%`Ek-)?fYWTI5EWFTBdBu$itD;O>|CRiEmdexdwEar@|2ho3CsH21@S0kvyaxTVg zPc!XMzSosR7tLJ3!Zm^kEh9B$OD44Av*FcGcrdb7W>qWON1LzcL0$;l_T`mQb_-rd z_d?iH@@)?txpK+S;gKbBT|Z1wsZjaFI>j%k%u>Go{=C)0YNeHUL4Q8FHZ@nc{^S{G zR8y%M&63?@p&b;w|6bY-Js87bVE5$PH69>1kjXcv2)+`vYM(mHf za>GQyp9cKvgu`ThBc_`Kz+S+j8XZV=wrEt*FUQrkLoB9V4N*Rt!vXCNOCMN`LAz}PF73Dvap`hIBFZA};JR8PIw_?o30G||ma`+zRtp(Ln6%Z7q= z@SQlP@Q>e@Fti$2<^Q;UDbW7;CLAk@hkaRIf8w|)JN0C{s)PjS_BKip3sdEwF_3m1 zih_TvlgL0-m4f#DP2)DX%B0+4o?OvrMm3oUoDB|9*@_({4|zUmRhf%eV-u1~w4OZ)d-wptA<4{SnH!^Js=tFDmy#Njy>6%NG{=*~ z(AAH=|IZT>go3OujHzknkJm*7y8J1i9iACxOE!HCc$j|E9Mg{2XM}gDT4AV}huw)P zAJbY;rHH1|C`(9PKVKzfCSF)!V`GQiWCn_d;dr}vZ(u{=|1NRs{Om0C0j;+}W+^u9 z;l^4sh8dE0s?Df%QO_UH<)tT09q3PE61kH5UOf^VB zvtf;WR$HFFR-{#BYP)5?Xwf{6+@#BF4NCjNi$za<0?ktC1b&z8HTxUYD= z)y}tXnAfs@5Z{I)RX^BR{Jco+DTVi4N^1INudbVV41*;DDCR+#aIf~eP5(RkI^@{J zOol^9+(Y@&F#(~3XiNuUmEjsZkU9K};%$fW0j(1( zEayglXY28>-Z*+g3V>Z}>^`O)U$vWPA^FS@F@4) z_ei9sB4;osLhdm}EUrO!=Od7&(~VlP{_t%leP$}iTum*Px1VCJJh$AAsB(glj#)e6 zZcAE0OpuqZ2|k@^nfeaj+iDz=^floJvHIMH)Dqu9Qq$+&g1W*LSL5QF7EDacZd$Z+ zY9`zf>b|pW21u?KXPZ3&_XtYTa67bnbXf?TG;ln+Z0Li)p3f*66vK{JXe;l1R8G}I za4U|jx~0DQ7$vL?QT!*(Td#tsQQ7%}UVdL?sCu;H;r1LRyTOsou~!9q%aok{J1U{R#a>4(-hH-aIBX=kehFh3aYq&IkP z^-6*$dwnkMC4VKO@kB=-M90lMzR&6>2<{>_XS)l@cSG{RwQ#qnbv z>KTJ{>s+>@&#}e{5bBM+7@={!*~X>w2dv)TX1Zp9wxFU8frCS^`rN zRg0tsq1|jAmv8KwIjAZ<$4@TSKiL!FE^AXALb6N*4`($isQ$yL)!~QD=u7jWJxaj6 zdmUARyRgCNGuiVcswo=TBt zh3Bx<$E+s7%90S(RAG1c#Odmh_$}2%Iql#llb&3yIpYzqRbZQ~spXz(CIpOse)kcC zY3v;7YWS`+@@J37$%5RxNZ3Stg$1osq(KsHy7kvWHZB>D}xO zAmBh-1v3y8+X>g_s+)!D@tB_);A}bn zwpvrd@1=bGjpt%E87<7Yqw1xPNu#~2s744+g&ilt$IhzkYH$;FRZMTlBB&t)(ftN1 zsi&HU$Ox&?$uVbbDME|NP}u3*9Q|ciHR*6JqB4(@Nk+R+IwgCM6LRg$KTo_-#b+>1 z-F*W*F+hdU(tRT*rMdKI6OX~T#tT9;(LtV}9o?;3GtTTEgXFG@%aD#r3?C>Vjn^s9kQT7@WC` zNLGr+MSHZD=!%lUTDo;^AS`Xdq#~t@el-|KBzlu&m%55b3mHi*SFB-5c6xihaHHsh zvyh%PP#C>1vy2&Z%zrZ*Pes8}Ar!ytklCwn7uXanL5&IDYpxHRctRQ=v9bhm*WO;}SzMMphu;nfx7cpQnygCLN$^0-5mswg>__XAD-Y|CEq2)X$ zS;0z+aproKotvx}X8ip#rtA*lAC`P{cY#2*Mmt@anr{${4JqffrQhOOUBQl;U$nV* zGfYiq@tM^dY+{NKX?0cKOeM(j^YV8t%5=~s+~eNK1uP$`;WnVpqn8i8$IL=>W{VSl z_+{m8*II0FxaaAWR`i9yng9uNq|HW^>e=b8Urvw5=VrrGlfpknEV(ug{1>fc8R~H8 zSWgx#B3v);efU(v7w9&bPg99q)@hqHcjsmT3qKxIyXs*u7unUBHyYu1JG=PIIU0qk z;Vze&7eilBZGRui?U$ZvP8(B}7u@2XnMN!bj5TJN?Yb#idcoD*O~f~&oi7tX`UMu_ zym}?+mw%hC=p080Utl7M6v0T+LoA(_9KJo+%p)TX_7?a*EUxYz!Yux5S!AoN?*DCzMjpYaVBy0*mgYvvjt?W%YC60wB)zIno4bLu%p1J0L};h1YV=$Z3uWnh$vc{e8k z5JE;U`vuweGg7K1gt;7_@X0wg3oLL%yx%aQ5OQ#Tfw>Vb*)FMeE49)?_ zF6)D+{HSA1Wnkd>_5}kivzh7l+Zn9+n;JGf58uah<)&sxC*}$?tg(o&XYwDoMhBq$ zNJ*I;dpR?sZc^WiQsQtP%i)sreE$jQ*sdjv{)~7F&w;{;+IwjH1)bDnM203NZRo|; z7(_pDOo8E(h7Hc_E0~?!;KoCm5{h+Ft)i5&d<&W}%H?9ywQq?=ZC2riQt(w}!;;#= zE$OhrB2SM_UBgg{MmG1SdE@D?5;i?b{3h^31|8;P+bp;++dzZyV{D&a`Lvmk?HwY@ zb#5%mHLE^ZTpSWyCev0NKdP1`r{}2cNlue&goQ$SV$EIpRb${j7+Miu8Lu19dv0YL zyY4B+>D%Z0=iu{hy z*kUJ00JF}(DY24b_>AP~5Byjhsex`0b{PNmk3kj+grRg#U$6U59|62@N|2E37 z>Emde311;fyv`xTy=ao=Qhs1^DE2<|yro`44hVU0*RW8HhrLuggnTH9kZ-K&= zW$E)paHe1#5MjDt=vPoox3mf)?K)`Jf{O6pFrr|p{1O7w z(`PUkzu_a_Ld&WuhVSOC2j^Z5*?^;=KtGJ68r+jznOWkW4LTn6i|)yD#gH&8@`$P1 zj$$v_l@Pd#r}XzR6JBH6c|TVQH+L#Exu|55pl_N-52@n3Z;Ze047NqcuR!R*kDF7v zO0{Xn!>wF5oXr(@l8XHgEcD+vB&`Z?%iJYu)$Nk$PZ%~;ogbjx55y~-_)nS|Ww_}!3-Hrs2K`0J zszuI1EZYu*Pdvhon&%N8lTR7&D$R2iUQ%2AAhgT%Zz@>}vdB(%wb());g#YR-OD?k zaW$?-DOaSH0)0iXnpgi5{s7{0kA1UYe?!_p{6?NbuVS{R{VJw7)y18QtYahF;9b~H zOOM{x+9tJ&rAr-HHbVs~)v>r7I!YLK{qAyTX_~@ zU3Q*RW=FIzxe7^CF&4jLuJ|zc2n=~hP#&>B-⁣ufA~3NA5OmqX@k|?6I*(5~WdN z7-FGq&q_y=1~&Upt*03D>Y{j9qi4|*z)RX`o>%e=S6ExBht?Rd(v(l*CS8Ni)239^ z#5pl!q}^m5H1v`~yNb2_wz{ijo5h!xUlchp5$Xy=D>HYdO7gJA^5kSF@%A4O#7R(j zySH`oY-wfqahG_5$M{VuC}aJjTl92QWpnGZy~uudo#^^m9M zpdHl2aA6Sk5z^F&WJqNcl$0mBSlq**sBQ%EFy{qM;xL7nEtM($4PcdmfR@+|UHSMJ zq7XE69TRHg@f)U3Cbne-g|sC&CwtDE4Z8C;Zfn$lMS1>KgV7L`Dy5<2B}Zo%LX0MAqr z5k!S{H5!IssSe0CZb19uFb8&Pp0*MOq}+#A$3zeXXjxGr&4B z*AY?tpAW=%sT%5ORSLIv!WPWzEf=gF zH=9Oz1=GasH7Xg01e?t6z~IE{+`VVFqx&HoZ!ERc9%Cs#lEqw@KkqW!0{TR=5eipz zgTQ6Ybmb0dfu^E=^Xj$}Ge)xs8VHysBblX-VnUvgQHX1Vs<-@V`fQU*YBhNuT)3a-m&|H|7)zo<&k`zjkwz0;zyo?!?E;{= zb7T$WZ+%TnKdNjR4vWDH6k`YBJK@|nVSjh(&zi1w;agn}7u{7T+!9N~#9e;Snen5< z+mIUs`wlm|`2Vlo)Qv_E6`gjPKbpIAsA>Y0w_Wq3%2h1w&4_zLt7Y_@?NGI1W}vyR zEMYezr=<~4Gh?7uQ>Ko|_$Hv_zxhxBtmH4&u<27e&G53O=6AAJt6O8(&0*k^KatE_ zsXR{oA~E-yxjfacJYRoYqaDR!v1%waK|)SVITTa^3xLapkT5qCuRKP|)-*pPbhkJF zER9++j*-Hg#dbL4I1bbeMw0Erp;c4pe2sP^X?|xLhnE3a_F8dW2(aPUN5v0bEH|9| zxF|a(Y`UI8?{4kRdEBaj*qcSmX*UNe)Qk%^$YR>t0CC?jmA$C&+9yjcfDBp<@(L3g zsy7H55Jt$o7W9(hc&QI_1{>`+(X?)Fl^WxS?S|4CUpKY$(8#DwrbWA&^@V_wf0@m! zgeEoFW>13!{Qdq;^_baT8w-J~B^g+}|ym*6_64nx{fe!NEj{xlrjPExeQrOjowdHcfLg}!v)&<7k3M}Uy z?V4rIB!_!3G)l!OL_cejS(NU+)8SQkOV2L|mQ}erudQ=A5Fs6ZX9tj%SCM$1J@XVP zA^{0zMlmGCwv8a!s~9YJ>@K|GEQMdtSk2ur9sPu0lf-x@v|}2jZbp^kZDkn)Z_5{| z^4EpJpPsdY(>4Mrs+OQ4duH6l>^D|AZJWQo(e8a$MUCN~4NTh)rS99+irIZWMZZc7 z7q#d>-BhB5vGAUVMkxD9=pA1#nXTF(lW6^fZwV%;0;y>JwW)DSui(8-MMXc2>HQ6J3__29BL8}Xx_Tg`41-g zbr~EbNELw7W={@%%U!GdHy2)^u+wC+ZmQxc#oSDBFRjx5m0kiUl+ zL*HN3A8!##c};PS>|8_U)EbYyAFn|tbgXxEaTV~$Le@0%i#iCTrjhiFJE?4;U)^T* z7JVDreakGtdYbD-+~8NEa%#-I8-`O{&JUHZ)qg;%cjAti)m+fh;5e5-<8DS2d9oIE z1;zHcRbHl2)(yHtT`%|~iYnORPB=MfoMq_t(AIi0aASL<*I@z`F6RR>gzR9lHypXZ|C`0Sv6SZd&f|o& zA-o$_zGg2gGxl*j?m?z}=H7LT$lL?<4r?4xl>UF$6McGq@F7~>r=2-?!abn^_lZO< z_*Co9+vassyd&zl8`jAck#GHSCoJb>^vEw)22EvXf;E&W6LfAv#x6xYswYod2t-YD z%I(lN)>?^W$t{WXRg1dl+nk^Z|A0-T(y74|X=B~}u|_Nl%O== zl;a3e$uw9aH%(kNuUk+K846F)UfVD*Gc!z485w5smmH4YS9q_u3V-G{_5;)=5MUg7 z@=sE0=Z@?`TCJ1D@GVzjGw~SEt~;5HlCj`4@m)&wlpuiiNM~?^WLTp(Sz0m=>>XV9Y7jIbdr3%7|Lh@;_ne zfPocrOS}rq@>ClAn1wPT7nMDJo-25iG&6@?gi?KHupc+wH!OUb?#hmsPMTIykHi_M ziJZz-ZQU)E3?IQRT2SsyRYXE^$LZ2ny{`hWZv{Vj-o8(F>Gf512g?yLa9A+SD&(DU z18GUjxa!Xb@*A{_cY42GncO@ps9!DX@Uk;qzYP(W4ggcj1&7Ib(O>YZ9gq(zc(2co zYogrWG2g}COYVotR5jtfUNkK!q+~`)WXMa0lPBlL`WR*nk(3reA+o$K1}3qBE6EmQ ze<+8mc%#LYjtUlNwU6gVrzRwao@?I7IY`Q*?Qarjrs4Xy)JEb;HwR!?Y=)xKaW@L0 z>JQxE3~g;~w1tf4gZ~qJ@8iS2{i(lJomXEywkw*vxPpcbjmSbVp} z)iVTBw?Rmrzy|Y-wd^m2@ zavryNOz0qc3Zq@9c}))9RU3M4|2Ebc|Hac`>4B`zUj<)DI78qW>+eX&$U-i$O!52*U%^4{xR0$Y$pjRN6de$$=yS_85?4RcayC)|2B`mYRXpGl>?E& zaDt^>&y3qEk`?xRDArMezGq9TTXU&Emd`515Llvx9)i1DRENFrEx7KJXyUeYP~*}Q90ofnHF1FAR}rl6@&uhX=2 zl_VT&D_AbWNMjq~9)ojU7~sSb0|QH9?vmOVz}igjmK=vm$xX^NFJ$B=6s10{3tmTs z`>2p+s6An_;E%XYIfk)~j1w9vx^mO5%4W6$lir2kGF>A&1Rx#FBh@Qq*Bb!|%}wto=)1iToQ`h4O*=x(VztynxW9|uJezIXTqb_Fd~^@} z#t6zITMv)@Jy?MKEMN>T1|abV!bx>!t;he$s44-rOiQMQ^WN@zK8w(1qd01^q3_R9!`38{2X3Fp%8^18R>Oz5Inxx*LDD)K~otW$;gr9k19>A!Lu zA8llW($bxPYP@h&DP^^*oKYdR^FhWtGK0bmxVC4>j;pTiR)`MWIAZ!_$o207?>~3EV5@xwZH=SG$S8XV4S+Cb==vbPYejI1S}gEKGIukBb$1{FLXdsCM&)G?ZzD zh8{5Cv!+Pl$C(UYZUAi5g6udocz>kg*%Z;*`@a?wFI6ZXu?yCMX(fV?1Xxr#* zR|9!-gr8b4?K3qZfHy=>U%+*T%c~X5G^ppkZN&LCNw=u8-Bi`2+o<}$ zvU|uLu5y;2cfe=|(x3_na2f}&>1_7l_6xV`vPC-iOH$5%*>_j zwg)o7^OqchPln*Ga1HUcK_!ws)1C3OthxdmYaXL6wq_Og4S}ZD=g2hA*e3(ZT`a1` zjO6YwNTC~yWZTX$yQKsY93BrPi?qJJW&1(^p3542RXueErG9RG_G=RvnuPXGCvjM( z%*z_jsv(f5=YDX6Wg&M$+??dQ}gr`VgwK$c5h=8E$_~g z^y@?=uvLulYCtWd8;tN;t_(i%b0#xOtQWQMoc2Mp5X0Ta3+B((fdK6zDCwQ8sZhOB z(3BTSDZazAK@3W_ZP1o<{G!jhezLg?tD+A;qXBM%vt+wGFYMBNaP7zp^f~C@Sf>+d zWowAco98 zirsHj0UjP7z0&7Iau@VMz{IvZkS;TdpD2_P$hsrLz0JM+sujvN6oT5)Ut^KPYwk;4 z@v17?D~&C;nvsk`GwNaX_ag-KrYD=*aq`t-Wz}20zO9DW@y#)Y#Y9IF@L;A6IL}l= zVE*@*5yT6cxmAhk{<;bL zNBqzq*L|9m)wfco9=6p=29!cXF=$?58wDz=+l?TKg8p7(`OzYFjiX4nO(O=W+EqoQ zABWqvB!T{BR2-#=0=V@bQ;$WG=O7I<<9^;NHXAl+NP*z*I`sK=?&Ka{qMhmZvo74?b{n*wOGpJ8_whf;~G>+q|f+VleL+8@H&r<-+n&K%UShL73=L0^9>Av(l zy55z(aAg-v?|gMwb`j1RlAdAuQ}DYN;|BJOTK z8#ah5BZ>*vek~kUPe<0_vZs<(5U#)`WXHeTLkyX&1H4x>G0#-};#}I4;L*Kyi#D%e z$BE;P8=Yb9-2PapsVUXAH2@dXYLVv2`BR#*$IseQM?JK?#HqYAtX!WB+q_1%5LN6h zlb?7+)k;e2fnU`2qK``g?~pdB-(V#$Ss_!58jjallYq^yT#K93h?HqMsx!MeQQAx#A!d>?`9Iw0;EQ1B}iMs$90U+?3K6%V>OSnmI zw^0VRuTtvJPxuYs#mjaWtS zhNnps<|wAZW2Z?rD(g+`5Sd=_{32tc8D=zw*Ed*nHnY$Qjb*0dk8g{kXj0<~ z&YDwJ2Hkn7T%NdAS1DX(tEUthz@tGG_VXeuNNYpUq--;s=z`3y_5s4ldd0s~qY91{ zi#Jj)PBNNL69NX==I-P8D&jU`*Qe=4Mfiq{HewdAne$aaynl>3n0^Rqka&V#ANLJ9 zs6f7s14ID;N;aKOrEr#ukLu0BllM7x`ru>hswby%QqUOa=&DDX^l6yW3(JaMqoop$ zUEKEB`C=j>Dd9sq*t60K8bLE8RbUGgLzGVG~3GKlVcQv0vqoZy? zHwqK*Jw1x!YqAk(P~w2E=)|pP9qCy*Sd-70gk3z%_zicVMKcA?vNh>s!24u=7;dkV z=Olik!q(#4Fz=O}u>e<^lSfaJk1dJXxl z=g3s?C@n9NQVTcDrtW8ZUBgUMtELe`Y7$VJp5&`*g-4&Z8G(;snL|+3sP$dNrro@4 z_@AaRAWdtL^(bGgwHt-m+#OCJ^1y8&PTs1W59y zD>UNWURZJI4=0J_WnC?%NQLrfb)91iO7Bm!{OE*ZniR8_x_CF$R@NY;H(m?PkQ*#G zt4cD}7IlvHSq=dW=}!0#Qz*_HT)i>p1m599Td^l2+CJXT`gCkeNw*K zhSfHvOfaXzXiD=4aJoA@sUf85cC4cVZ8@mMfc>sNjPI=yb>~hI5wi@Zl|BD0TE=B6 zaLgYs^(RXu^by4a|BHFos^^)dId{CxB2@Wr-;7S!9B`o?7Ue5>A^8Pmf`fI<95dvD zG#Emms{sGFPFQPFc=FoPYnKxW2%hgAzepRbC{`2YL(N{#fEWm1+0)<%;oAn@? zP&@5IepZ$^a(arPNxfU?oOF>Qda^1ye6PC2ohhra-w`#_$|`eCo^a-{n{$d8P(3rx zQlk?PXeY-JhyW}+ed6ntqazQUtO8U6)E<-EinXVpr^x}+fPp{RgV^>-g$XmfC_lU; zUge{(!Sd{Kq=Tu-T_o0x+1zD-_bomWTReKgPFmyIf$ z@N2Nd9M`KXW-yQ6*OM!$dbN-UeD0(P9Cj~KljoRW!I_{9T{RvhZuG5^wNZj!Q{DP1 zQeSol(9=`hY?<{m{8m4p+(T$RLX#O%wY!1Mun>G{(4-loD9cnSTfHetmEs69cUW*1 zr4?>x_|)5XHJZwqcGr)rRw9N9yeK`3$r;kZcNkL zTs`c~Q`O+vaa|QIk()Jjsit^l%6E;v!=#x<(-5~Gbjn!X{78J1qeVIB5-P`7I{?uL zK&zH7)9<8|Fl*4T0snmSc4ZS?)j{D&2IEswTsjrW60^2MNZwHlT z)n3MySWS6F^HHMaf;9bt=c$5}_QU%ctZzl(7kfSQ9uyTCNGUuxHjRyBMFg|Q;2)xF zH)5u#x2#IcM`v=c(Z0N89$yf`39S-ih?s-Wq^5e0G#+)x0NcZUBB z6a&3Q72IVuaeX5KS!GC$-q*ZZbQAjZwe zcSn_+==D=HNB)mAj=;wS2|!q?JVn|bqq~{eS-Qc8PL$veY7WL=#a3yuQ3#kJw|Y7v z-6`Z8`xT8r<#sJ@04;ts`p|U9-ioeAqwJE$ycpTNUjhxxZD&1jwN}bN#YrE~$lOT? zNuca8Cs-Wv9TX1$ZUkckI6cawib^6{)B3QF4dwh^owcc%*G}Hd9?E?jZ^jQkq0!V5 z#-XBfjyZew+}qn%#5=tl*gDXeZ}v@5oiBy;RPEN#wfUZR`~GYFIK&fP0F~|`*hdRD zG^7}ZkkNSqrrbHu#zs*dp3L|MBO551J46_HhsUHMHTv(N1?m%y zjubJ;nImx!3_YP6|=R&VH%hZsXlc48}R>{ipD15T& zp>OzZ5ZFgYx<5pD5v%$ppVv&LU+zB;xh41P;-a-J@48ZP%$*jhw)&yI4N@ZS0dl86 zZ^AQfoxc27%aU?rx=XX3@|Bm|aBz384oXcl0JCRBuviZI?>uQ=Dve@1XwqDF=rXUF zx9iKbdB=vy7VT|5&C}Tyf_v#{vw==E-s<(MveH8JM27;B9SKOPAy%z?LJIL~BKaY_ zs{}S)h4d7E?~LCYiGx#oxC-}CK1hdXq3+^Tl>WvI7d4bC_L~K(?gH=uk^gF`qiW~9 zp5$ow;*&PhNG{;>)omuk9PTMGt3#t^6}*d4hQ(tCOz!z*>8vm`a^oz9*)vA{^1IkT z+MbbbL|=O_Xy<1)!Vce6-z!*>S2lDimA+d3u{oFUE1=P|ToU_B- zq)!(*JZc2SRO_%cDgIHHWh0a3)vev>))jJbJH4sMR@<^CNjiiRm* zok3F|O8GQ%D{SbBj?X*|StG;zY3wpN`|_T#;K`5Mv()>mjHjFC_ia9hHgJ-XL3TyT zd57-|=+I4$JCm;_H*{e~dD8khica&?NLggnD=%1=bh8v3+!`j3fQ8J4AS#YqSyTI> z(uD}0^XhWxJio`2fnZU!`Rf$?ve!R-Mzn#vDJn$t=aqokpyJK*`|Qf@=*?_TZw!nr za2QpfzTzPJ`(0Nia#)=Or~5Iz?UnP?uErDbwoNA^Xv@+gHG&zd0}~y@hj5I?cSdpV1O`M^%rH3F+29S&ReY$B5r{VqBYVZ z9!F?ghSqMwn)S_1kF!0U;PEhCBg*=%RLcYac%tT7;JPvpdA{ro1mrl-oV(1%?g{2q z*5I3{;-N>^cw*Z?ot{H34KRQ7E$BXcw7=_DOB>IYL;7|#tu{VOQ^|G*(99ZC?Wob6 zk56$?A2`z6^67~WsyCu5S7YQiiUN+zUC#PV)Qgs`5v};C64O~GT~>Fw&m&q+(4<0S zc&{>?GYV^s$-ux0;CD|zHcYweq0dV1>&lQFD#m8YRgwURcbc*cfAai zJ(y%O&vOR~7Z~}U!by};Zax9}$?lG{elCJOD)*xPHPOO-jt%E67bV{1F@M!COI&Qf zd$!%??g+!ARvtT2u(rwk|B{CRyKHx@-{} zCXLURcXk>~={3W3Rk*o{=1TphVJSwE!QJHA@7>|%oc|5g)mrtmfGkwo3!uC6c|#EE z-;S@u`fxDenfKu5yVNwcu%@|Hqfexll{6j6g2P?9$xTpYh1{rrFnOGxLf)Ppz`AsZ zvAPCGl!iEKZx&t#rK$ubWMjF(!f1VgqO4JnbaWW%h0TzNYSE zQQ+!V^@6kMKz4n8@@53;LgTmGOkU>F7f8e?BoOrd1Dv8~{ko2<&@>UTVq*g&haD~m z1@LGAZ)jU-pn5DH!d$v4Dmu>R%U_3%u7cL;COV{0t!leS+q=G)Tr+Rr_If_vaqFS0 zFDp->bUOYOMX~sdhcvz6*D`@});g>?V05ZnENC^M7IVr|I_yrQx^Xsspr<~5m2r`) z9Mwj$yGWYHxmf9Hzl2qkNjr%8LBpIw+A;C0D1vX!V~-q~oCu3;LE-mi^`H zzcb@9o@*sw`uD%h|8C6^n`t3}Wx?IEVzG-sO^?8Ro1wj55dGFr$As4@&GkFWM)$GS zjAPH*Qp>vfwV1XO>bl(hW=wb_^Bl71p=O)!I z`c0Sh8AouRcV6c6eufn70Bai%>}Edza1FXvsY*50L&As$mCm}^fws&13H2F}80Gn# z4aWAOW+^a&4e}0<_#0Jy`GW5#Cip|~Dp2=zz7t?hf)WB-)Fd;2E_({Vy5O~(0+l$H-31vevtUq1Bv|2WxG32fpFjMoBI$1y)2~--50`=OjK{T zuouaE6meYdw5>W`wz%Mi?sJ#iTVhJG6!*4E_xB+4a7($Z#!E!RDs}Tp_>p+vG~wtU z6W=Y}_pQe#mvk;${FCiC56?ilPgn0-IsI*8L(hNSMUoU6nf9MeD#Wxe{|syapvwp* zz%4GO4q(a?uoHaJ%XgRkhaG9x7qQ*F`R0Dw&FXL=wvsaX54mpCXg8Vo@w?nQ+j}`fEfkmW{X38-9s}&NB%t zcwj3&WO5}-o7pJ}Scx)2v;1Vr!MCH-hh}S05AZUBvCF9}XVUPlH+#zh0$h^7M*zb@ zePFy+ULfzs2+;TYttq`V37&Lr>=t$s{U2v!qawt;Y3Xn#-vPin0Yo^$XNA%@?k?|C zY*0uMrsw)A>CR&f!yh9V0-sC>;=|Qyv0(DkMyRO^hX7{~R>+)3>ZKlS_|B ztwShVZhj5a;4ggNDmzq-kyQ_z1%DN~^E^f^i|*bgp;v#lfA1p_xNbEd z$MdpE_f5uHCa2(GEx1+hVGgRsG5TN__ zGH*jmx^Mc59sG^y7DN{kb#&E1O|%*mElXmrJ|G%NU}HtqgE03_bsJU~Zv8=ENs;pj zjWO!lwyk07)!IZM@rjlbRJxVp;XP)3j|beR__8+o0I2giTAa zhyjDd@|~h~hnqn*KV5=)c!~oD!kn~`m3#fV4_`pu;Q@@7>$mB&wD5C{(RW{yW+KO_ zy^Uy}Z7KfeWZs;0Vrqn*2BUAvEn>lrWBs`BX56dssRxW?VFX`WX^gneIjUd_`p@0T z?oJZnP@ToL^^pMK?~epA@KN#oX`(@3ioHT}bR-xwiZ%i5>@n4;ETUcQD(;r0{7z)Y zhK53Z%ZEe(RZb0vbOuWXNXV19etvaqn8oza=Nv>+%jNdDohu7+4GUh>y&`kZNvu=z z8jlbJrF;JJNwyy~kZiRgRaY`brw88bmT!L%&|6|$quPmi zJ%xjEy)Fqge}vOD>E<#^A9g7;svO-Ef2PWOq!1&XC9vhjViuR)SV0OC$vQg!@b(c)J+`O z^g;%xggVyu+V4cPhXr30c9olg69T0vtjJ*D>uIWf9{{ZWNZ{1v5ldXFjLS6Auw7w%k7ueWj0OJt?HHc~z%CJR9_+`XhEVj(d zD(;6AeM=dCm-N3ptK7Wf5SxEp3dahqCkrwAzRYtVJrI@jlEL12a^ z6YPV+$Y6(*rym3RTefup%A+LyC|wwN!I=D_BV&mCpz_n&|ME>q zM#{#p2n|R9M9BZ~0-V~2Vz4(BxA+Gf{7+YR{Feb$1?utts{zb2;Az-8nf3puC;L|m z3IUcZsrq`P|F>rMCw&=Me!k6T{$Jd~|G`G2C*fmQM1f}fM*+v*FGA1>c-mkRSp^{U z{JZ1+_suT~fKOMUYO>gW?jr#E0Pr-HPA;>*pzHtCO_Bi5DdvG*$^TokgX#Xbf*pui z{Ko+S^wj^y82~tHkxoYapDkbh?kNSp;7Zld$NlG_1;F$Y0H31&pQcjC4YSBUz1o*% zjmNzQ5d6LHUkq-&6XX=b#AttaQ}a0_@pHe^iUPqGz&;Btu1l|z41d_EGmW1T@$Yfd zJOo`|R!t2AgZ%-s=*Z@(`)!uaff5Js`TT1@rx_EyLH^`{$-l6t0Hzym}I-XZ>k`}hC+`(~RE`Pz-^I+TaFFu&g=?hs&5TICZTf86AVrvk1XmDOExX+6V_ zGz;*6kaltlzZlj9FlFlVJ_N~7B+w+VW`#|@ZPa_xac~y2W)yww#o6nNyK&RjH{yvL zQ4|QxyzQ7_q%dQplu$4%!rMwAY6_$+ZTKWA2Y^RAIl|d9DhLEIZP~AhiS+^GlHH~~ zQVsbOlem93KRDzIw}C-#KR=**ZiuEop&bqabN?r0#jUgXiISB%b6ZLRJ`XYhb!)Za z({l+RCjajx^94LBHHbJ&C^!v`m}I<<#~EI+eL^)gdY{gwhFI_B(KrrDn`wbJ;E#3D z_gebOY6RsnB@d{Wq_}UZ6a%wGr#s(E%*O2vo)|4;m8t#L%W=9Y`1R=M=)f^^KW0Z0 zify1&&E1R{{_l za~&41%Wkem!q(xyc)*(Wv^7Y!=xsN7H+r(t49~AefM*CSXNtGtDn1En+9Oz`qn)eQ zQe~T*n)sU+A>f@-T+{vUe){9qWyr$C5BS`P0DpARUk$+qnD8Ewl(14mCQK2`az_@q zQiTc_4LrSm(B5}l*@24vx4TOcjt}uf@aeUsKp3e=*PeZ6u#)^?2_3L@o_Qkl6m@2J zFXo*b6?CpSP>gwZ=?i1DTsx8!-UQG^WE-T!0Kewk5v@;O2{+2BMX^>SW&oxtrQnYB zQ&7%1URRX2+h~3`_F7URp-m}n*(sqD-ejOUb+VbzYgRUn29 z*kJ$1!sI|{(&~P?ZY#*yRR+idPhV(zOo?4{3B6gU+JEm@2*k3sN1Cc6Sj;jkf*!Wz zC~Hj*kR|n~TkW3d^;bQM1DMGgdD=7N;1~a9K8P9#&9>kKINOA8e_IP=ZbH*fkVbA; z9hH-lt30XG|EV+)#+E=Wk+xb0<36H}iFK{}wyUAH#7eGCSyjv@)Uu%q?06SL(ymR! zZTh4lyS+%$n=a%y_)1vx@KbbLxf1MRwjrC%w5t2l<_bnPxAT(iw_}S$4=9$cY>-8^ z5%Y3*T@Qw+N_|k;jOEU7t_9VyX{%j0lP$HyMT(g6x#99pds1e;BU@xiR?DH4pRLCH zepXNn%`Vh5X3TMxCh{n%k)ZUN_U@mY&7bZ=8+IZxtGiEIvffvpURNDkZ`1QzZyQ%$ zRV4*rG*YnDqFux?wvx*fFaOeT5F!E2S0lu70&G{rvA^s_B3*&uPy#jEr>|ri=bG<~ zU;+$%V6t9N;Y5$n!t!tnR=!eiI2nnt{P#{nz~xiUuQ+BWe3WI&F5g<@jNxq+kDJKh zK_^j#$;mJsxM6==G!p3b#MG)0)!k}?Zys5%@AMPFsm@T`26YodRkd1of4wZ!1~hqw z{djO8bI#O?JEE-9Y#rmAKY(fB)$<}bGc(X;F@n?;RsnfE*SKPl_0wT7l`aC5^lM}v z5|W@b(n<4NXBpeZYUtwJhW@7Az4K~<-7&dH+cS$ozesPvN%BQTGYdzg-4nCsxp~k< zNWnh&GRe2R<5hn&Rg(vRj090(yrf&hqaEM*lEznlB3i^&tcK* zdt2q#w5i2evI#`}u!4ajNH_>G7Y~}yi)-eGmOhHttriX{$F&Ae#d^Brtvz!&NB>N< zXYbl73cBo54G%xWMyjYB$-hI68zfz0%XwL5PCzh&B+^Y_{fg>5g(cr#06kj{zY&EZ zkS%kz*cSLK5x z{RUkOd8LWY3Nc~seu&{2*xqlVU>Ofb5@6r`uP_gdBwFjeewcG1s}cDVLV=?@%!l;= z2@H+PY7!3aR{N9lQKCSaCejV12&_%%TLlI)>};qNzP?~ZQJ1aJmodw($imx)#dmczLd#3O$fE<)|y zsi$~M>be)vY$5KPTq^8hS((ghC72x%S`t86BcTcXA7jksQy)>BIM(u*+1)Kg!BILv ze+S8NP<0d5T9KBIR57PuFv`I>5k+}1ExhP=mapz}^L2R68#buXrOiq|XGL9`V*f7RKmbI>TM! zY_jYD_UO|;?XhiwB;MtVp~97Aa<7v(+o5O8bK-$rR11)@M++==TimQBvLEc<*kn2) z`&Z1-ECNZaXCVYKt<2lxjn1*v6^QG8>&r!2nHl5(rlc^SJYa(Pf10}Ta45Jh%ouyf zE_?QECfgWGmXApDjddat#>kR&R3wDSHg;KusmPKRdq{>bMj4VNOj4F1OSTea$@ooQ z-|wsY-2d-=?tP#4yyx6=&$*Tf429h5JBu~Ok1>>0sq5%$9A%#Bqg?vX66I#!gj^}p zh_VHxW6!mP>^5kV(m>Y>?G4&mqWue(hQ7}frU*eGv$L$FQZZZ;#y1f_5@;l8Ynza2 zd*X8b8c65l{@hw`KUPwPJ6L)8UuPuNT6%BVqE%A8T@4^n5dW?PBp5F7(>?OJ&Z23!*lmhZm! zb{F~u-+Hiw+jn?1xEfHC@MKp`-&G1}L!%sd`$J(z@gMr@EP&BFGoBDM7N$c0YcaDf zq2!G|c!O5up(1W++gWz#i;F-1PwP9e0Pw{R6OEbQbmViowilONbvFu1ZeWxc*&Fl1 zS2&tz->-6<3VnV6T}{5aoL~B^PJ?q+;7n!_L^V{-@^-sy?PAwqV>!kAh9*SxhT{C* zcm+N`zAgKP?u{mZRkze^z>5EazM2YVbmSOtGhF^rX=HQTe!=W|A9iPPlbUh5NX8+% z8>_rBDlv8~O2BY%EK(wv($5@1Iqo?Of2Yp*az{m;AW~brV&AB>;S-bti0AqRQQXgt z%=`)}^rhu8Oja|l%kT&=?zguLN&Cf|!C(6H9T_onyq(c6RTOR5$X4hs8#bB8KJf=& zb=d;;W^Lrw+??PtvWN(ThsGI@E1W#dNSKO$T+h8|LDYw2JBDk>FKUDo;%L5C?r?2L zCr32P`ZfiT9<}?FQ&}#BlLE#S(QaM70tkD_i4>TtnD3LB`Mh4KES}ptq3V%NV%}jV zGozaPFehE$4c9h>f~+4D+$1w`JzUNT0}h_e7xYC zyS+>a%}>axiuG4$rbpJy%?+Z#93%z7v!w{5KsE}^N0CK4Jxeic(u}3y4^Lc!Uwxj& zU^P2eeSVa8vXQf$XfzRCgW$f@iX=~DXferpG{D<&BFBh@bD?ir{A8`U$PkyFdF6x$ z0a<35;8ogd@a=Y)fsc1c=t1RNxy2ILAuW2SAq}o4(afbtNLtwQR@aPT?uJ%CayCCl zrRMUgX+PF6Odh}M(K~Cm@yhCg?sH-Drp=ct-fchN9_SAh>{>g2hboL$D0WBu5Vue; z1yoPrbsZ3_dcxoFk8=u|1y28h&-*t(k+k;8n1lBx*`xG;sB5qI!%I3twSyjI>jV&Y z@25PAJf2;*cun|t@%0G*RHAnbDTvT0@CR*TQ%zW4E4el)ab>D8trEj3Wee+i*u;`% zg{{&yCDs=X@^#}9q}gs(JD(4#fI5rvMVzhM zBs>Zf%TgPk^lw7dI=#WQaU0~Bs3BJ(n0ki$`#&Kpa_?&_b6;&9JD-&kbc*bz$}MG> z(jDp;pk32V%RqKH@d^ZYy>2^!OA#9u`YCErfT#|GxmNGrj;t=LN zJFl7W!rl@zi=Y}ReSJn0LouuTMr?HNYdBh{IA;jy!zda}#`XV6>pN`k+hWsK!c=JK zSOzN9H?!rf->ruVN$Ah){PQP(AWZ-W%v0q7X2y%_$0@=;9uJEz9$5bYJPrF={@OD7 zf=SN^)$}REeMFc_VyId+hdHx2{1rGsAQchjA=@>}*CDU)XzEnJ5%M>o+-G5V>f8DP?=m8P0H@$6SHP6N2n{j_YW z3ElaSE^uY_US#C(_4DA$;#<@QsE|?s0*^o>EkgfX*K7uj#kxi>8m?7hEY_{x1nT>iXvc^{H zUc*C9=zSZlql8!~_g|18!D0*R>$6Mg0S6NlJN(G3uN5YczLBP2*eyR zpsSUP4}!k9uN~C08t zu5qtI>(jP_1w}h9Y3S(bu_u~SIMaq{^dGFU;D~Lwhmb{ijWhpOw{2&oe{D>sDZK|V z_#zQXzo~vpq;kftJ+}bk=?g6*WGb2-`qQURaN=UZ3ZFiG27dYk5)29c@yZB3&mPoF5z40LswY#eOujF5G82gj%>q3xU%LP8=GbUQjbyE?}RMs-JZGm^Em zHZc%4dp-eNA6<(Gu+ThO`a2qN$sH76J;75D>BRa?#0|85poWEiLVm zeipFp=M<uMhOz?>!w0E;oF+V>CU8Oycm?#*fx3Yf?!kz8awU)Tg6NZf zU1TC~5;y-m_#J%z#c%l6=uHIjXJRlgu+IZOUUHQJH-~@6&93Tyo@m{9`Q`WWt2b^I z^`}P2vp_QjfDrmCVZNTJnOQazdL9u-7(|F|7FgHT2!#_kVNfMSQ*9yauz?|yu`WTc zXfmUypKq5pB^cNUen7G~Jy;JYE;#sG5hNq45I8u9CwP#j{imJ^IqMVB(%O#*LfMF^ z+kg7>1^Leph)LG;M}P%Bi3@*KasfGML(o!MNO^w;`Pv1E4r@*b4ww2Xp{j4MWJn~f zi=||2{xZ!p9(u{N1teY?ZAByu8z^)YA6KiqI{R%Jy+yFTte$$Pp-Y-NH3-Ed^E&;$$$o!zKl7SLkFClV8!G10 zUoJqXfMPh@M2BvVq<7ucEG9PJBZ-(FWe}jB!*>xwSKJNbZA(&5EdeR$P&GL661~PX z6Ea}R^cV_mM%)J=_Mhv8^kS%h?d1%C?d2|on&Y~;BQH4`Al8ZEdttM=huv*+pUUvE zO{)iqPyn)kQc{&HlXI$eqFf*Pm-h+^^)W2vby*CWzM?*8z=X0RVW@a{tWl@*7%X9T#*9RUDM$Ct*w`I8QSW z6kSWUrwn`?L-Oa8Q-mW$n$Ul47XTHK?$oc++mEi;oUCc@*%?jwyh`z6??p!nyFd!2 zIuNWkY)C+5URamwfefh)st{209p++cp!3lQd)))u<@B7bRm-F&5dPdv(h8;CIC$IF z{gA1PyJ*TYQDkLm&3iWUZ|hFzOtW68G+QxBcOS8@n^rjEwni8@TIr5qiDw4ojsJ~_ zJc<0^9uFs4T)ug)%vRe`!Fw*%^!Og2cW$s1k~Vyv96zM?lsn}_b4#`&__tMwZ6+*O zKJ3kp96ZT&AoqA|u53ULgryxiD`p`oig$X0da7W1(fd80KF^`Gt`+OZq_%loY>*S! zg2OdD1&CNvMb_suoELKbC8J=meV1q8{mW`|HQV~U!3W=+8k4mp4a!pCqQ!M90lB)~ zdAn>cC!J^>SD`A)qlcXp?*^c871>)Vk|sC#lcwBm&rzxI|IO9TN<^1eIP9}!hov)b zMeXcbVfTCDEhnA2BYPe)5M)R^-~I+m7XqCP)}cD_&aDD60zE9ahAv{h>NwL{T=loo zjK73hFx3Z@6Lm{|D(_11d(4&?1exG37p?C|5^%9vXhXNfz=GrI%@Gu?Uc*v~(3g-H z5e+RU>lAu`eclwVrs5TsV$suR@F;(Ci&P-z^y11G`2T0|M_x0SL%uxIS8RIgOcl4B z`r3G0Tcx7b8YT;Z3dstjF1@{b^5a;jn8P*5zBj-r&#|n+d(y*t!Z}1U+(f8@{bidX zxIm?8*-ACMF+ElCj*j0e3>i{?O6?#)#dKDp9Mmk^Of*z%=0YMVSGItG2Y^yhsX~{dgr8#Fi>`j z1o4w^fSKMRsTOHr!&MKP)`6T?@SLj)&N8634Qu&H;cp*~z?YEh!Z2li@tQtGFPS?S ze1%X%_L2mV!FR<9D)W%P3*wNQ|#e$paZH+u{mgYYAMS>P|UJY9}LKN?Ja0Yx`86HrVV}5G}3AQrOY&e|7ELQ?f!IHeb?1J+buc*~=X>Y&Y8BFO0)DJkZLZEXzUW z6LR!_O9W$3`d4L(RrMkbJKj;MDz1;e;fD$cG}9)zQTOf0obsx8MIEvv7V{BVK?;~C z{tKB9{D4e%`)0A)bSRl_ge~DItS#1w?ThW$Ucs{LMvUIPPb8?yYA*5@cq3s*2c&BJ z5LpQ@>ntU~0tVM01hvvhLX2l}g-7uf<%^LNtoi&1C?u?ySN)tAMV8LB0vfE5^>rEU z5E7)$qX^e-zoS;{*%N3E&&gU8gAXvRy#+(A`3r$|s*&3~R>N6$lvo-KTb6-{r5Hru ztGiE5FGhqX(LovLlaR?~hAr7EA~I`~QO<)~MrKCy7F%)8E{4}p5}P!hnz>+AT{A0d z?B+CCe~A;NwotYRI%cpk$M~HOs*T@l;c*Sz9l^NAJPTw3X%>atUC*}q+pb9g=$t^} z-X$N|p0OUM9#;6D!`&I&+d;y=(DzOEh3h5rf64bO5Ty;lyUt<-*DPY(_2y##1?0Ma zv35M7@72Mg_E1!{Tzi>- zoMU!YD1uyM7R9SKUOV(e4xVoQwI}q8%~Qtp!;PZu?_SE}1SgclJkleHi1gfDSPX_K z|Mq6VU@9$(--N=M*QGC+QoAN*@9Dj1&<)LQR1C zp8ECOEK3@r>(LVrm-k>Y?hHvp(OO>hZz)U!s*+#mqKi-Pnm{K0M0N`o)r zU*iT=4NX<~S~4^qu<0QpLL^RWHcsNg1Xr;BiXmYqt#1WO@Q!dGud})aw05S#@BN206g5&jK*1V@krs5h{KTZD}B7U8!MjR zCsk~lH{z|I=qQY&e0Vi`qdm9R3c~8*GVjK9KT8S_kk;^j&ZX?VoV+1s@Df!f*L~rN zZ_a4a%3`#&U3O|52Td$zYo&8=Q@l!}o+g(G9f+_nHrVo0%9E*PS|#4Tz*#9!Iz-8 z+v);o*i?pV(&QFA0f$)54JzrLdj*3`yInLB<;xRvd_lj*Fhh{@7ftZOg3O|+{I4s? z5NZsr(?#3)Yp5pOZQ5c=R%w9bm%`uKoD$vVsY*5x!>$&HVpZr#dGI2sKyS>ri<`rT z|N3bo&_U-D%}YZwPlLNu%;ibh%2J~vPA7g22!WN+ipC=G^9%Twk&H$}H{)Jw@=wJA zvm8pq8h$-Zt>|9pRVIY}uyTbpIf1_a-PV9A2<|+&?EgI>AS(VDDWi6_q)UZLq1_%g z?(G({!}zASc#{w+02TxbD3niK2>BOIi0MowoOA@WFsVSyU+k<;6+}5(a#tvj!}e-+ zLjz6Gq5d*Gv(GRl6Gwh`E?PX@EOXWqmhm=^N}WyvJ4Tj8VFBcfIFw|6)R}6!=fUcD z{nDLI!eIA-h`5}0r73?Ysl;@pkP*v4+dEeMyXJjPUO-XNc6$sbtvz-VJ<<;pGLJdr zqj(}kKM@Z{&1E+=At{qK4TFvo`y+u1(OIRIU3zbhW}`rkU3Wm2c~nsHQh2*$1mpObSe4ss|MbIzso=Bag*0IL6Oc#A@EG~9 zu)jd|#~JH1CDjGG!*kzgEvAM|y&R8;IdPe<)uYTsZ$zTG9z8a)J`7pQ8mP-O!*p?b zyARc!uTqwH!(%JpcE{N}<;Y`5?;BHP3@5(!XAG#1Kbw3RpDQ$3!jji)f{}cRtRAAI zRI9e*#dAAhkzoUYIXRS9)0__aBYz=vxZhN9$})_%6hfQV-Ta!ENeqdLDMjRq_)#SSL!4mei2ecQ zNpd;w@t~w*OHM7{CGqdyo%B>;Tf8TaK?3q6*x^q{vB zZwllts^~}X!?5*Egz|%nDnY+TbhsjEcbj4_=&0|I%(18lfmj{0FLK?A zH+@TeEoNDE(uw2g;VSDLPhaQj`=U0gplrqZa9dMQ<>J|_e`l^`$E89xVkaU@NGCIk z_KHa|6nENErHF)&5{QATO@z`L**!AMLsTeFdgC5e{ z7~6U9r*u9zsac^_^F2jwRa8ShD>906;YPKM(A)c0W{kNpn=6 z!Pa04g_zzTK5PnMXTIjZDYLT8rT`yqW~OquSxJ4nkRKu#-c}uiFxu}D7)sU9g9ahn zJ+NpL6A>Iic{!ecx$V*D4sL`vXjALsNa?%2Ct~GQp;fvdOfgNl&&12a@DkrIE(of=}gjZil?rWT`G3Hj0|vY}b=#k{KasMf`x8XwRBSN8Uk?cXxy6~On{8+6WVhq>|AKyB)srS?%D?*K@^2JfA!p5ty!lKcHT@OEz-XD&JdKJ?g2>W zpj7_~4nN5*E=jT5n{Juj?$K^BcP~j8-Qp9qB-;6#VmsA{2_DVgyTUybc%##^W+waO9kzN0s+`NE zWfN_VU6`J}iWMw3=sJt8SldTRsQdGv#1!4N>X!SWI!juQRJcJ>f>pl=>)oEk7iuf{ z>Nz({}5`XnkeGro~$@q6C@SxPb`DCU8 zRe7*1_b0&4*a8hcIsg)#o=!5nuUEavgcnw6aP*ngTkI2BUDOrkX}3?Kmzk)It#x8-2f>IT zs_0we`o!I9`-Q8j+K6vi1zAMNkpRKePm%S6YrINPe<-XUA7n^pUnjsG@k3sfbNz8l8T`z-JRKSh!8DeWj#VzGB>gbL1^X`b(0d(f0GKTTQ zZ)bxYw@_AyG@o_Bl#GegDq^9HkmZ-@a1FGGA~8F4pxZ^uxk<60KW1!3Mu2_YZs8$& zK9;mPLphV)8t!|*EMVdcqc*`+JXE5$`Zld^1I(236~_t4VrFuug4c7*i_HVjI8M@b z1uyF!PsOU#Q1KvA!7plP-ROuor_sc%9Qa;`gBLGQh`f0`DtC%`^z9$~jOpAMU%pTZ zoYzew`_07jTmr@LYQOQtZ!eIFFo z*>b)VL+{~O+z^W5oAp9~)A1G!#j;qub>X6g7!m?m^B(km*nm>Gb-etnIx*1MfZrZ5 zl2m8lV+#@ziQ@?%`}Q=+39i#PR5(_%Z>!{N8n0*88J{*>M~?1^ULIPXXxv(wYGgt4 z#CUhkA9BRZwrCmEaYPJJMQj4RfUSovBZ^6H$9mS`$iFOu8C7`LP)4=Kg>*(Je-&{0 zUD;VUV2|*Y>xN{QWNy~wTV_$YH3qg7$rPzbh5fN^tdakD@qU{T+ru4eLVft9#a4ly@Di>6|aAz>aWxmbtAC$?A z2Jq!=R%~%7;J^7g{R-@V#j2+ThLOL+InzigH}Y0GITahfNSJH)l5U~*l`;Hue2@!W z%bQzF^F%2vGr733$kJarG73?wm3Y_P0#f^CXk?arv{7`}cs;=DctpKZ-^WM)hfK3@ z|5iq@Hf|LvhlQkZC1gnHKXQR_haB~vUJJt+U@p7D0$ylZ<#4pm|3PP~+OU?DvY_!~ zLD@H#P-;zYOkDgnCf?V;5A6d^NBb*ScG~d8N!zlnKPrYyyLhJ79csXgE_HCXCo&@7 z?A4899sRA@`%!23JYoky!w!Pk4EbEIRZcKnz$lvyBU#Hxh>>f;1MFLn^q}Ii59=3w zGX+9e5tO}6S|zSmtich<=X|DB%6no&W(Z6XJqDE}T8rU(Q>h+1d2mI(%+&^z5oEAS z@yTGkAwf$z(LFme@JghnZ_Zmm7W$Jj4$x^%XR)eVi|qLZ^~KP<_O_3SXcQa2X{-d4 zpD|0NZ1CLQA~fy3T*X2F#cJ)6VYzC4kjjgzjsOQU1FvDB0x14;_ft{X8h^l;VI303 zO@(t?#QfbeMWK?BuQ9U0!uRwxTA;g@!Q)wTIeRtxy@18UnHVV`5j1Kle&MY#R@bIq1=F; z3V%A6wC~>N%4AqN)ScufX^g8VMlONp)hYeEg;2$}N6)ydeXk3sn@|GDX;_m-bW%{m z{q+I&KyY=I84ZOFgfo0t%W+hCdHT~S6;_g=fz)0%47+N70i5;(`WFK!=l4|bdNq+ zAU&9$v*{Cw_I?MP~M z|1WWFr*>jngW?wxbsMkZi162MKNah4zY-!+M&LD+Xc69oQj(f{&zb7M_v#wH9xj=D zsMh{xoMN9RP>G>9zDQ!TlGMXs-&_qO4{RYh{07O642cF>)7~gSevs&V8bQ6*-`#7( zd0@ewL&87ePUw&H*=S&L-B}L9QrQbUH%^<}X8*S|7Us7EM^m7cuz@G<(eW6^cjX|< zHmLGl?|#m3B_8A%<3i@qF2iq@brhxiiy>itmK8wuWINl!ax_sk`WZ(d#MoA>wcCV2 z1=W4aZ}SZod4|EZT*mWX^z8EQy|MPB?IHA!WhFH`urv0kcHB{+#oUx}tb4DynH+8v zlzW7bA=N?k^qo+=k%a2q3o-sQq7ew7-W*P!5v)|{XuW9YaZ;Qw)_y-Uf-VksA-c$T zc)uy%zPdVSepKQ+_a+QlV3gENA}+#cN@gEJjE7~_MF?LT!}UvX0`CQ*mu7`Wv8NYr z=?_ixVcLMCJftI|ZY#=L9z1A`%vgBCoBrJ7>mfqPXWX(fm;IOL2Q>gqe0@o|L}#r! zbIcFK{khIXn@zmYjl!F?RBhw_H#jqmISqJX%R4URW~jO~(aKzJ{gOHTuN9D1=$M$H zg%r5NRVB4(+Q70#n@b|TmrVmCcOVg8ybY`IxR{_|dV?Ju$X!Nx6uK721{~#g6T=-t zJ&w$%-bODYE%mZWHMgmSFO*lpkZrX!k=p8gQF#SmWo7%9Z^2GQ$!@nwQfYKIVV64e zO|4b6SuNL0RPUm$)}E($Eu=7$|-YZx`1IV%*l>l$d{R2Ss zD<&pg|6MWj!qKqn&_Ke!tgUx6rTMdafUk1PSf|pGqH7u z%tRxAu*am6aYzoE`0b^Hkcte1vgr3j$I0k7(444GI!TeubUEGsVAdU^RyF;cLcPVX zAr{XMVHaW0CjiO&#L;Qoj#%_cCDOs0nC9G^?f;gbgOsy%GGh7 z=Zdg>F$C^#tEOA0F*P&iJ0p7Ele;Yf(vKaxQ^4rp5o=^2PT&A#lMg(&gJbde21(6x5P(4me>dovM&5G9b>e+AFbfr~Qxnb&7uZH!VP-SnYiKYSH9Zy{2#wEwtI z4l$&=+hJ(iB991dPoY#(qgd+NMoGa^UycsJGFb{VTf!&07tdYfDo@|{n>FfL6|8KG zkFd`Id!8lU4 z4OT<6O*I2@rwZBY%vh5TAy6>Rr`;WZ zwq`DIPky;RbEc8piE-GdEZ!+Rs=;7|DN7GWIWJcTrWn5if%-}bIg<3Cd>o3!MvN!mtUhz4VU1f@+-Nb zW-H5Yc4`Kd2^agXI|$t`SP%18Qoi%LEA%4PiB6lWfTDn1T&0CZBcnWT_#-;UEZosd z{DpDd-Wr;}344DufkuL#jLYo?UfN4G{zxJ@+Y`ESnRa+UtyZLhIV+txJppOb2fN|P zz_x>=bMsiIuykCKY`7-4Rpx*dD(_;T??UKSNGd=tKUngP&0X^&{{$k`&UZoa`^>$-t8Q7wWWxM zZB#!jJvV=dws(2P*ANr=|9d_$q5*Fg!Tg&qkJsB(hnCDA!NMSuQ&$iD7>Pt}p;w#z zy8wUEpDfE7sf72iK6%$j6~%-^RN#je&OfV|bZql)*WmlA91zixLS~h=K5>&W=0><+ zeKUg6K2HwZhPbakc)~mbTUsx)NNctJ1_q~pSG%%mE+P~k6%M2BzT3iE;O(m%C?hd0 zyTy>>&2CU=Ts@p{;l2@;=+-plL|KkYlvdWU5VcXvO#J`$cim+h?zxyA%gUAU4~3F~ znoMMV6A}3syBicKE}{Qf93&ISHx4q3;|(-E*&3^xlV@h5DTP3)FZ>T`wluhEwCG~j z>^Z-9TE10=@CXD9607!wMb)EUf;mBXh6tb3Vv5PYPELQQ_Bm~JI^c+yoR~9laWRP7 z>R-h7omsyQ-dsPMrxiFke#E`I?@Z4+so8pgFD86EoRv=KSkl`a;({7O9p;W^og~}N zGRQ-TXEJ=$f zxj~#|iLZmL;c;}Ugl$e9$`jZBFP9`h;o#uZ+C4a<@cE-sm`$i;uR{Lx=OUCKHOaSj zBeV^hfnUCN)|$a4wgSNQqF5Q-5qYo0!ekrAzn@iCFG zh$ykfOW>OQC-bC+ZYJXi;*991sNT1lxXXFJnc#bUX*H63^07XTs`Yi<}GYgVs)FP9>rDP5t*`c?(1Ex zRXwIHjaue;pxZdCj#S{wwFRCzT{o~Q4}$2o!0(kbl~mFkpTx>O+uU{bEK*9+(T#PD zI_*l$qtbgZ&;Sdnyq|d?XAZM+AJhV9HT>Hq((ACFjpkfdfyI12?&yJpUd`#p4#L4k zn^Qf4QMWuYLzeYq^j`8{Qf7#cS88u?~gCRHKf}fgQ3cPh=&nWpcU;; z#h$Dn+N2T1Ryo!GoLd?Is>q7-n+AEwY|HsXR80Y@6<}}lC8DLY4vp1)bz~22K?mma zWk_Xq_{wA27TSG5*x<66HteT7T{f$GCC{N%`b~9M$KkNM0ukE?MRiM{jHK>?xWP#8^E=) zXtY#wQ&w4(GryM5H`D_~W9#I>DM~;QXhQpR5JpOWEy3`P8rJ>3nOXT5gkYpW8!@e7 zaomk25Ot=2gg5YQ_k^ql_N*{7IQNPCXrX6Ksl__cGC^c6g$rBS&eURuO|9Zzh?3$y z`+qYVfXvUTs@R~F&Z;T^RE)rfs<*|~%N=s`!A3EikVr9@K;^H4n`8F{pS@|~CmH}u ziKDjcU(sSH4dla~)9ovApz6jwFPn_|>@16ZkYYsqj?6h(Y}X8ZDUc~fIURtLweDM0 zb{)0{N4W|Qlh^uzP**4&4l|j{rciwIJ-48NDmot+=9E%p)(0)!B4uvWyEluV_}6X7fh!pOP(*&FUrfeg3wk&+ zjL|KpleGHM;CLKn^s{~PeR5^Nay+GG>Zu3JJlb$X-<_REa;=S#<57zhB(%YtW48P0 zAj=lr|A0qe6JW`PZQ2NzjxG5CJOOMVhwCCeK+}7qx_`>7Sb|NqWeugTgFNHOfM%IQ zB1J}ZChP`Ur)S1nastH(?g~MsCMX2RVp{BkJ(Zh`A`!5)O0?x}%WQW+pGWr}mSBt=-4I0P<=Y^x#Ow3pm`bGp zZmBx*qY>i|e)D-BE-TS0ExlL>yOF4znu#D{hD0FCDwAyXs^(Ku z5UFbQWd-7}=((H;l}KXI;;DtDkT9)08d0Gn!P90!wTRayC&S7n$^Wq+g8=A<%LWlA z{EtC`1q91VsM);`=TAH%AVHF*RB#KasCmoyW8xJm%4>pc_or#YdQZa2#3F6TA5lh2 z9%m`tLAKo2+AzZ>R~zt4W{|&`G7TiA+wu>iqA=>HVriC1Q^Y{@BZIF9MUuzXNb$C? zgM_1rGZOWt*ty>)n5+9i1(5&|nRI~Hv*rEm{%RR#ma2fnRof)}yK}##8AHeLHG+D{ zwFXHzctrUcMM6;v1d;sNZd*gn@L<(J6P5iO4)Qgx^4k+LbtuO8Tds!7b+2waEZ349 zqd?!fdfXOv8lm)>_`C=tD7BUn*heO_!lG{MBdEc7aGq)jPJ@Qra+EY?1?>eXVJ+zS zb1yvQM?2y+#8P-cbm9ItE$*xQ5AVqbc!QGZ)(Jw}nIUDZemGq|4w(~XjSD0)PgW!(A|Sko+}6Kr#6IL1a7i3pT$oRAWT_5rLtqHyd(dCY^(0 zXh#$qOL}?#`l$t?wVFv5;W3duO~1jhqIR$X1EaT}YX)ywaxwL=NzcnF8_5Wqt5Xpp zgW;fbI5UH~F%ZakMY3aWSwrpRUq`87Kt1V4^~mKFIiOUxj+vZe+gmbj54J~9jJ(fV zOZ{!FJQ>gu4ocp*yca&M{cLivOzIa?1UgEa{^h)o+8mzA5JiFaGcosNFZz$+TIzC7 z$e}v}>U9ofYHniV>SD`!6Yo0BEyZ~yh2Swso^Mo={p=FehF7&gw~70A)DDd3b429Y z%0=3O02RJkO)?lc2tpV%L-9(FTrX<>^>}hD%FpYHoh@us}54 z;OLM&(EVk--85*{l~LZp;>H0Zv7!@t%gQd$CXdtEjRl@PvaZWoOum4LP@>aFLJ6D@ z>@InaIwHII2^(7Oebtyk%Es%nVY83wu8II*YvY}$m?DP28ta|sL*nzNq{#(x_*2)F zi>&Bj>H5b-I(C9^_G@14GMCp~ES@GIv`Q*&7?>Rq^@Mfs94|zsbKl|GQ>v>;I!XJC z?&KzsGZeJ|QW%XE0F}{(G2s}iKLa$IK|^OM^PA}7>3AZ!s8CNiB?Bf|;p z(b?IZTEVJ~`^$svXgcSN!8)UJ_p*Otzp+*;Nq@io>*CIkBBQ%i*u-;;^6K@`23NGC zz5p&IxJG=zSoi7_h<|q@40^7pPpS!@8_Z zcDvBaD=QGxWSdvKyho&gR!DX|r!e%bK4vpYB}{AT1E=46X5Oi^NQxt%znfx6Ex8aJ zr#(ESk`AL~eILG<9Yj1}d{!Gcq?RN$QXGQ`TDQdL7$xR_1>xS?Ia{T!`Mq1lq?)$~ z5qqIEN{zAcyxj$f6?y~aH82-Jhz~1#fohH4Aw%&XBfFrXqP}Rwh^pt64y>Tmt7>KG zC5ooYr?(_Wd*CZDNd~dnLaYB0l7v2;{Kydt45w>Q0VP8v82-eQU?%TNfKc#u^ibn% zYYwcSQ_XZ z_8=1EIGV>LSJd*D0W^hwusvPkPj^Sq3t77r`apsNZ+D@oqRtC%C5GEdfD0AxFpYX% z>o6XjPXa$KDrzm>KQrNVS3_8E;=;O``P#?JU|Fnm6I|5@KbO|lO7JCU#5*#pLamMj zilY3ufA40JdGPb~Iy=`nv(Pm*T4B|QZET9}8Sih~xX9mvI7Sfr6Ir4P3JQZ}(miGO zbce@HJG}l!rA^hSX}SD{*~O2PrBY6OhQ&-*Hak!*7(3%BY_2NR_=GcZiVoWheU$B&NxyQEqTegaH2|(d^!yyh)%9ZK`G5w=?oy&%s5EqUUPWR z+Zmc2EnW;Zsy3Tz_lsd&Xs0QGQnSAX?W?-`3E?=94z78+6h&4u;y|AmghgrxMBn(kT>a~+LNfU>rA%Th#>Ie5CM^atAPH3EfovHNJrwS zjpH`JYt!_$?%{=L>jdnSwbIXix`Dpjw)Go&rhM@DPalTh+`}KrQOY&$Hm%o?i9`4i z83m7=zX3$#?u{E#N|?4HT8bZIT_M>{uQ^Tnmo?S~`C^4DZns?iNu{4u4=6E*Yi|UG z@JnPKk1$MM~&5Uqz;o$-KI?7%v)wXTV`#_{M2Vp z=okHCIi=U`bPeN->W=|un&g^RL|S^NL)x;Cl4lCc^K_H zA`u`a*g-@k?QTP~!1NuNiGzF?D-VrB9mh! zWm&{AZ(CChg$WzB^^>3FC)`n|Py)l^c5RLcU~_FwNJ?NI_||y+Mmyza;+ilc#|cmG zCjC8IaaTHIw3FJegK=o5r+x0$A0>10b}C^RA~AO*%RE)$F{UcCw@~@DN-bOKE|*`7 zvvlk0RYG2qGj|le!XQ)Lh&DSQzN&tsZXrsO{_IbshvA!?qDG?>_dX7*))R`CZwsfk z`9L|?_4sgjoNYx3*B~VAax;vdaG!$s9QnIqbx`}AW%X%bokXh_$F@17O5@}b_*Ssf zqSm0uJE-k&>H24 z$lyZ|{8r-fp$~m{JdiKtOt{~gp5u54qJ`Ma=%x&{UM!$s$#&a>xpefb`c?D4HNTVy zo!s7j(-*0}noATye}5MJNW*sy#w31#>4)cj+GUg(>zujg9@}WNLill>ehG}PB(la+ zVX}FMIp#nElLN0JE7qDBt^wrkt1zlQvO=ZPfS%IrdXREpHI$B%{FxeOCW)l)r-`;H zpAPo6$Y?&tom6CRTE6>7rBO-oo5nCz+KFl6opcMq;}-1)vGjB%QurAZWX9uEmohlw zKZeLC`n0*}z&n_=J4O>jLz;Z@e$FcxSMs+jcpUJ-xwm^Qq)i`(F%!wy~n^re0G-yLO(gAd)lZvFdb^jC?*uLud3y z%+(SsK*c&q`|m3CujwAfH6Yi@BQO5*bdDQX)(?9%mv*FD143 zwM~fBDjXByS!6%ZOK0DbA0P$E5|nC%Rypz>3e9IKe#kDa`&iKh}c}Y*yU4;&GgbS*xQ2H!)s}JKd23m!+g5W*ap8_c2mOjMmpdzaKPIMP2YXO|8?^eW7b6|&;zvLkWirE#Wm%-CtF^o4+-9s@t}hJ zfo! zIu#lpR&xDi*HHrE4ujX3$h*En5iVB#iAwXhlB?=V$AdW!+fbOXD%+ssDLj7t1qbk3_SHX-?gG)%9WBM)$RDdc2z2&JAskA&CX>t%hDeUdV<>bNUteV ziRd`%EilJd61$%r?xcTBc*foNt%0 z4G#qf^eJ1g909i!Ox~t%>7J+df&;I1oJVijg>*H#UBhBL;$hfXIl84fv8dg}Hgtn> z{Wh>-bkz(rB71a=KXoZMaro3?p7AoJnNd*l;oZ)lnnTAlO0QNvMfm=~Uluf6XG&i$ zm&&Hs9BCou$9-1{I-F2U%|A;pw8fPCZkqgO1P4!6q(tCmI<$Z%{h07>E z4*Bdo=fG>PVad!}t*c5w!;(`Wh30-c$@^>EopgO%u^WQj>^+UBR~_D%@@KBwgX5J2 zvwAd>5%2KXC$RyM3a$i<1fyyMapz?fP4#whQ3TQNjFok)_lK-A&e+Ez)jY!q?^c}m z_S6Z^UM6Ki(@yfL&aZ@N7i_uHj3dVL*xMWlP}3$OEvl#+xQ8t_2UZp_xh;Jj7hMp= zcSS4MZUcIHdiAc?drRx?=e*8BLAur>@Fn>OeC;f?_YC%-cie=D;O^Gi))~^R#S1qE zwi#|X3l_|(2}{+H!&T|m&v_3l#w*1~bGy6dH%p@l-?%mq^6-B9>fFNqa@HBFpQ~$F zhDS49Ih!?OxW{CqsQY}sK0lFNv#yeUe4JUbLkP=WKdiEbov(eM>y-oUa5~t+GWhJx zQ;rMvl7D3vV{u7SURS4loniA`Hv5TkZ3Uf9%hdh-^`4jA(#J$5t*(Re86H;0`=P{% zV8PZI7%PLHW?9E&!1cOee|taUX-UOJmZ?;ysbilYogC(Pbeo}~=WR9L0)fT5Dp|MP zwpp7q;Z)kW;hrSlXU|~N*QTa-9GiT2uc~v+m_{iuu5aYqxKhH7yZ-y0R?;Ybx{Vwo z9LSXq<@mYBiwD^tUe9`mqmV%hT8T94vG{`y>4rYBd4n=hUPEr_8}Gl6YRa$8?iLSC zJ7GnyvwgV=O=3vc@}d+-vEQ(~ZsYTP{bjjFx@AoxO7HiTB@-LjQV{o@qNU&qxuB19jz?+dHhA@I=F1LT*!R< zt@t}ln*+OG4gyM0rVqwQPE{4sND}?P>xXZfiqvK5EI{ zfWf=jexX$6TG)}OHA=a0>%?T7#6SLZhyF`DGoml(va-l;l)I9bKRCg?vTO&T?%Kwd z?NBigi|pR%rg==_Z?^3U$#9cL%U|II`WW3?ulV+6sHR z=C#KJrl$v7O$gLZ^Me5Ql}P1LEj%gpBJ{wLzBhB$S5m)%ii|d*%|%q!K(f`E``m_p zj`7fq20QPdELP1*Nqe{l^a&i2O$6TyI;27gFWh2J?FPh0iJ|0i#7KT1@Di_eNQ0 z&(+y%8C){Y(1Lq|30HO2(EHHU(l^)Bxu!H4_6YS7o;4J(;@~K#Jyt>>sowP}b*0^e z24}!y!|vSRcd6cw(Lp9F=7&}vKKe!qW4wPZ@!pqxPejBK0e0XVxX1IymURsXaziZo zwCLA8N7iTEbnP%zg-?+*{P|R`SKa@_J_bzl-2WO<>IZ+sVDWA3N*N!cFc9H@P@)Va zIyms7y^~+|kt6%Qd@7@_OVjBD=qx)VzYxG= z{rF)I0wt5vB+=&To2t8mY#$xNEJS)Y_Y< zRy4uY7jJV*iIpJtzM0>-6cyo*Tvgnk;#{4f{yhj>6q(T=#i4vMA13AMB+JW`I}#L>$V0{$Zo50J?K*PnttRlc<}63 zX84~Q38$MM-|zHE?Oe2rCOA`Ga=Ry|9MeVd7fZtsUhXAOxOCrq<7FvceKkFwIwRHh ztS|O>HL_SjGe+CvTOrhLgJrv9&__Wa{&@J+;($eg7=^Htrb2#6u$9frb(oKwX2)2k zk~u5V1QXI9Cg61*=#C%g9uRBZ;#TxCUGL)V5bx_g)-^->YCTZ%qH}~KvKLvnEw5|Z zvsV{&!~_ofH0#HiZ+JJapmTgi1(OzY;IRS?lFgW{ia`2H#tMxHJ@$UNS8#?7*-bp zw))9n*`MyE{bz%l*wAE_(8yiCC^l2s-}pB3h7py6wOI@lSiq?sZk|u_3VvFs3qHRf zj^;tGjlvu|L&~*HoHDFVkrGLpI(AlFP4|NRN zw-J3W*8a?P)Of47HwDOwi~ryM0$7huzt=cqF8SH5qUpSLxnfF1euV<#eu)b^+7*NPxWyWB`HTH0KM{RPrcbQuy$~QugZP^f->5dS_#{ST z!_$&nj(4zJcBt_ZpP1|M6(1ZaIp)oetxA#Ks#kY+*nge;5~^33<5l>cXW!9O-%a-Y zqVXtk#O4IsGXDfrGeLb|CWPPZd3gJ{RZ;$O2}l=zXf}%#Yvc+sM`xQ;G8o5rsjrq$ ztn>sus{FW=KuLO9yqu;jUorfX=cKfvdN+;dS(rK54j62H>xn=`DlU48_nK_9o3w;O zH+N*T@T<|4iWUqy)?dH$_fPjRYV++6IoyXF)L_Vzo5x>_(FP5Eb~*a+u_{n{BG~l; z{8E7)++DYXT$Ym~$s^6#*?Huxce1HsPNno&E-{bc0bXZ?mtWYm`OZI9Ca27@l^8p` zc);Iz%`s`r9Im}WrwBpxcbC}2MZQ%{#{hIrnM2>nFoF>KUU|2OR_6w92lR}G|ouc3py zhnj+}M4cl1eZlP6PY9;9j^7Qyo>wdN{7^{@8zsKt)4txa)%kzec+04`x}{q4uQtqo#5{7?(%icdCq(9dG9yI{;~h`7`toLDw$PtVnPwgRjmeE zCx54^ea|^uDJ~HI{wq(o?E!Ec5_s6|nSI%g6MmRNvn4t|O4YD1A6o3uhmVNj&B#oY z+=AZvv*N3NnDsgx8Oq9wQ-^n#dVNdq`E#DIj@9hE!aIReea2yF8HtSY z@X~u5hipGWJR)|dzsZjm<^1oI9Plrmlf|A6i05P{QFkIl$r|=$0LX?(96gwXPZ>Y5 z(H2&f;Z)Kd-0CI*NawyR2D&eh+NA*v^-TToE zJT^3noM7vI9T~*@6C7H6=I0jHQ#FTdcqu!b{HCH3kxfSenMbiGd2cqPNjK|)mRP!Q%30We#X;B|UfG|W{~}3+*OrA8 z7R(uqSnC$bA9XL`Bz6j`d=`6`cuYKY+C@wUTZ=+&x0nk_8Q@=axNNV zXjwX^%3n?GQbfo70)GE1g*h;5<4zKmynyA=eK|oiy)Kq-C6k+q6tDCdSxqo}ra-cv zl0^Fzs_kiNg=tdrv*Cf6j5sVr8ID|C#?LjdMp?|52o)r3&gnHc6$4_RzN`xUz zfl-CybBVq_QqWILBEzIbA{r4SZYAYD#lnp7`1znnw9*EGfa_T}#08Q%OxXka=rR|! zBIEjV!}2RI<|l4~=};#MU|&Y`ZBRZPzP=;%i@mUHSe(KJXjz~laeK70Cs^UXyeMf% zEcu}2bk8%(jmXUvJ9KDHTCPMUEi^DQf@XT<2BkFp?tI5DU(W>FP<20dVs)-3ZMhN6 z%=l5)-Ej0%gLYc26e1xp;2qg-6>f{6eiNZ-xwsa&6&LJ*Z=LX~zDlQS5k&(YDn(^7 zUMV+>M((-$2PI?hYlpI%M{afl9(hZ3!Z{_c%qO`h+&EHkzh|;pdvzkRiYn1Z(^9Zd zK!#i;X7E|gAf@a8myo4*XcGR}UI|GTxrF}ZZ)4DG&yMaK>HfJ-G7^jS>Z-1D(UO93 zVIpb{t6Yk3!oLz$sMax`Ew)e}hrBFH1%`>}C-B81OvP`zn*|-$7g`tNN53tZqVJOS zbOr54PrKNI~vaiv>_j2jbe$D-7?)5JVf^ z)}77-B2xXMvH{w3!Oapa5DF6PEEzZ^dS++2y`4^Go0@(1uk>hJ6|P~Nt1>3A6ptmF zPIof*G1WZnmC1zS7TdV@Z}Pk&9cDn_P|aaLE>W;P*ZAE z*!dGpQ2SfOL|w`3*5}a8vU=*RgHKil<+aebM_XB)99ZepgyWDkPYWUsaEX1&(ZA6r7mVE5^w}PBL5ZsHw)QGb)t&t?G9+F_gxJqn3BMy z+vOpU=jx^RQTh+In@4t|)!@bVi(g2{IH>nMfwU)*ChFSD!m&PEw}V~Y-X5Jkj$a8W z5>Y?LiG-oya^(Xi-G6VJr+U0ap^Aw*gGTLtA0taxNMF>{=$-wQD~Leps}(w{PAdme zyR((T)Jn8h(buV(Ys!X}OE7(t>D=PFT=wC4CcBFIH5`I0S5(|aXkA~XPpoKi9}e^S zk9`=rPMsz!X8CU8n&00V*il`vzF!aTEt2VqA0*|h6Rd~2IG>Tb3&e$_`>j;6xIpQp zL528#LIOhg5jG;mVB#sqYvwc9u7~$h6Zh>?I#fA1uZkS?Rh%G6$s^3BEC+6Q3#+pB z8n|^xy11ZE%q8U)`EFPLNr2j+!<2FOmuPyLaSEwZj-W%18dmy+M5V8i=i zjfR# zL@v2V;91ua)96UBC@x^`zMoN1gVb~Q`CU7gm1ULW6)%Clco2I((d0x1t%NT2m~q@q zn-+Q?kJ#h`=tR0_L=p!b%N=%hspSb7H=}9tfiS2<(D|j`ISGZhHjtKHljbi6kcJSd z+^70p;OFrf{jc0}7Fmd_*{I~;Ht}pI>gAECeo}w6wQ>&@SfaO@h|VPVq7g>!L8@VEwfbRQW)M z1dNN7UGtEY_YP~!MxGaI6?Tp`%0lB@jWTML{5FS%hDQFw{x6bB`u4PJS#u{oH2CzD z92ns$M=*DhG7@?DYGOEeY#J(UI*(3E&pZ!Vvhlk;{j z@WCc3p@fRsguX;#YC2;-;z%2kfE)g`VMw=E-qSX2=jQW{s? zdB^lCk8L?(2dmLe5}+=WM8W5#!#81k;SIfpMPbI<$oniunI1}kDg!y&n58rw-2#H- z|2|zMMuv=-Sk(%x)Q5Sw+;;0V8Awev5%?q_IETP{+^I@G`qKwGN0$$qrYZ9tsSf}8APtG z02w}Fa5A2?Nw8IFEN@(OkK1I@-eBMuFnW3fC)?pq){DJc%J7Rb_=Kf>%CBN|VZH0N>^zJaKSM`^2@a=X-UL7)GsdsHiyfS{YdW~QNHSq&UYskhIi^Ci~6 zDuXUGc9B~5SLbOW9ams?3w-=_m;ssz71)!|P3j>YDS!Em7xBm4{P!D}wNs=13>#Gm z?37VCINMOKVoL)!kf9ywVDxi89h&6h-o~k@L8*sJJP;B3uB)2bKA|wWX3IR`PhJZj zq-D*VKU-Hp5W&S)rt2Io_W9k!?c2A}XYU%S&G$O=UB>wiSezu;gbU^9-Xn0+MJhY&qR|K+{-NRu(OJ+Tv!GZjwbEb8A|{&?;?Dc*Q3Ae{ zfd!sVT$c!J5J~~+=zs2*8|CJQQ8R_e*J+aLW`88I{Anep3Xx(VH1P*vLzhB+=*Ipk zlMmMf6mpej5$N*w-Mh5Gn6QUsAZ&4G9iU*DN!7nqlu9HJ@Ch1$+S38zMxD<6P9Y)8 zO#oskA>JwUAa|{_c)?T3x5#Mks&S$l-;QA-0{rpzfP3UY9aSWF4BrWyWAQoE>AIdb zEy@v{+GaWf?r7t&>T;@btt#vEJI|g#e3LEv^YS>&=wwKD>fw{*ub;L>_WOjTN&b#2Ufxd-odHiyOdmd?gR@AowJ}-h-jmjTaCVN z4H_2IIxntVGmqLr1bKS$FyaCzL2wR#0I8WqYacYFJHG3k0@!htm0S?dykYvabSI&L zKb)cSvbhsMcjS{+zXRPbB;I& zbRk--sPkR1Sc2{q6ga2W;@05U*N_?=H99*C2?~Y8!O5Ru6TcM3Rt){H(r4N2i1aPT zmQWgS<39jho%#5&(VQUAE)M@>#=+A5fkGoHK?4`Q34%(Ti}o*rLM<_P->;nW&e~fEF++J z>d$h7>sMdz*V6pXsunu18S`-wl`k1%B^8pav3wB2<(fD%QXs`kkVxuNb|&vX(pn^s zkoQDQ0o3nec)#8|LEE5)k6v1r^axUU>WJr_{hI~?B$5)5SHByJ!I%RolSC28h<;RJ z3&Wh{F}*-Ux#5Om_Nrd+A+&>g;r?ozw$J=Z>U;ehFKO_xc^_JR7C1BGdeo>3c)TV` z#=`5oi)InX;1~VHrGql-KXl&lVwaGdY|;}nWo39iFCLe_g}@i>8P6v-%m$Ru6b^cnVc3}==vx}@9X51{=%0JC-e(* z$=yBKZQUa~eMiy;z5bpohFDbwY^$di5A5d*_8+#qQ7qA013}B@#X4;o+6^$KlKk=`;>|uMfTC9>N;pF5w?y9<%7+@Z z2PJ%a--ZZRr!#ub+wh5Emj7X?q50!-m7%aI%oh?&P-$-^IW+=#>+QO5{NTz+20ct` zwLPh?(R^?r$Oi5ZK7221=Ds26b3*s^DV|%)JtOU8^!Mg1YvP;)9gFJMtc4|yzEjRK zE(q1ca$tk@Df2$`BLQcZjie+T6_me739fD6RDuJuao*3+kmCty&{4<2>@I zm6>2iYTuZP%H@JRvo{<^p?FJTPNa3qTxi=Q|LmQ^WT)7qHpNOae}sx?`g=Zv z86p)3T)OAuyjv$Gb@dw`sbnFiX0-Dw)Q$pmXAJ0*fT$CQ_}vk=ELM(s-3m69 z6Ew=c4C5nAJ+2}bOihlXPA}kiS6zD!txgN$kiDQj-&fxeHi;QRs{Z3_Ac_J-&G)UI zPqHlha6zB^eR{`{Jbl8(GWp&C&T0vW+^xQkBy1`?GYT-1DY9(1h$dC?wo1bODVP-- z4ycB{mdyBuz7Y0pOF<9I>*6n=**t7=X+cLb)!l6gY6VPsBnanH0e>5l9E#03OV91% zhh`*To?gHGc>{qhqdvg$>{ML_{*eY>nl{`AS_V$*v4m;r@>XMPay5S=B#o~g?v`Wu zcn5D})}6Z=ZTlq}Sm^t8XL6ow?u6TtrXkn%AIjsu%OWa~UU?~3ltgm>Tq;yFtkBt8 z%KOgTs>tiV#-jp@p`weMF5(+;q$tk_AKjN(y84_W5&Ng`8U`t=9Xx6zY&*%as#pmUcOVN`6;e>B~Hz$RGK%T1ADgwAKXBDr=3OnU=!VIW5qY(TdL`dlE&i>dl|P1Ksr2jK)q zm>=}VQx^PzAW;{mM(+HME82pYDM2<(Svj(#5o@8lWvC@I*Z(=Qe;|baQJ^N_fpm%n z5>aRM6v?sIl=T{a&=GC52v0lZhb8p zprD9M&4l~PsRBt>wBr%qdM(IlftxZJ9q9Rg{?~ugUPZ}rOzHt*!-4K>SIvt+u6}32 z#5!yXTdu`fUyF&HJ1u|u?0QwTm8$Ut!zzyl>*wF4%(Mc?`ZmIM?~{a0wD zfD0QG8^^{jk~Rka2L_1Z2glC`to)&_{k`E@_LaPm4aY9N(pBidurl6^rH3W*m!g@7 z7bvle$kqu^tqR1b{xTE!V#EzDJuGb}Xe^rFk`DbKWuydIq*uteewRC;n!!b&DrK~a z0TM@J3@E3X+0HFwbE5q-fn`-k=fPM9@s~yCMq%{lsp{*ISFr&6Cfq+xh?@_ocEhr) zl%+{yW6kvW)w=Fq7(UMt9bMtxflvvJY46r_T@H?c7{ALPnx@MU*bYfMV! zOU7x2j+~|>0#n9C$SxT-s80K)CHpl>7xQ+v-0bk6Hm|%A3x`RjjvS3203C<%#`!g} zj_54!RQwjSMtZ$#*{6{0AB3w&9?LYVKKzJh*Ha;43)Dd1Th?_@X8G+HmJtQwrvAb` z$VP3ye>sL|XNxmj|CywbanV6>2VJBezpD&mqX?s;Gr1F|q;-5yuxl#b`Sx+ThUBF- zm&tv(A;%_76)OS`m*NKpiHeC1Nh=l1Wz?$jf@)psB3n_|Afm6ixm3v@AfszrIDOtb z>cTjsULVk4oK4xnG&|SVs8a$(sU^Bj4oQgy4o49`Cb$2T&7Yd;c|du=d~@AOJP3MS zcbt668s1y~5PTr~;DhHQE?5oReL!DcN|vd4ZB#ekn7=+!OPrqudiR7d@`{1MKf;-! z-0711mQ?)I*wU(s@){>gBLY_4^(Wq+n%fh5YS>dPhEDzZed#4ztH+h%mF1%k-U zOGhl@o{%usua0@y)x8}S3*VBKx!4JI21>o6dueDT&C9T$ZjyF_Ar5r7N_r9EgyRK1 zOxOxda@VW9px7Grj``i!-qfZfuK)+gMx$k|-|Dn}(V82CI^@S@X2Gj=3)p&GDbagZ zyA&aRNgrpE7W%Q~Ts%^FtyJTNK<9LKP$Cq>IC{_>mlm#^7So!g%19+}y^SVlJ~a6= zjoj`2S3eD`ie2LkT|ySD#s=N+L;|mr#>m5tT!}uUc4j1vi2oG0i6j&d-s^H4j3t70FqXa1SE>Ll|xsN}C%aVI;n)zcktQ zQ~rG*Qy|Lj)VzP4#pi`WjWC$=l$>h074LaIg`GX5EE)%)GGyDq zwWIjjmcHL(@tb&h^H6UjTcbD{<2@xy2(@dz>VWtk75Y3F@fPDfg!a$J2x4T6;hM(P zu0(jomCr=BX%_vv{K;R4#Q@rPG}_U_M=@O(E_Y0t#DrK{+{{=r*cB|d^5urbZKe?C zuLsGiV2dp#i|1>NK9wZd5CMV6gl;Za6-)88rf2sg1oaGb%-z#@UUCNS##1@WeCjX~ zza=uYwPQ09?DYdyeL)LeMaGkST5LD9reaF^6-@Y{@|BYP4qpm?2_YkB*o#jcCkrMf zSM;BP>H&0yN`*vTsczF>l#j+tg`c4KXjnLAf>yT%qVI{mZ|x5H?ya$`%r+Ec@W%@7zE1NC;O=Q7)`l6*o?PlBAv&8qfsll_6Kvn*$E(xL)L{rzRQd2 zZKZsJSMmedTm%QoOLV2>%}s?^grNgxhJ88PV|)+>39SAIL$bczVa`fFr;)H6M+%i| zQ6sp~sJ>AX4CcO~1N&^SFP)6nwWqbwi$HhsBrI8xMSjkf-a(?U$4WgF*f*0$dq7&u zFv6|Ab`u6Ook5UzzxcYKyHJ8XO;5-)N$&o{tbg{>G>Lie^LCk>?OJ$omIq|B<89z7 zSg?PSR*O$#eK3*`k-H18#8aXFD_I5RL3B)IkgqypY8zEeqKgGc#+zS96O*)K+ZbLc zClI`Gk_!*01QGwb?Eo!^NRyH4m)#Yz!I57&+;mBQ-B^${y*29zURHS4ur%=Yw&%Uc ze1QjXJiy5#2?j54)oRYcg-_Js#vWrRfi|K1FLPGFy9ELd-qnmPwOf94O61AS z%;weka%K|ZJ{Yb);Dsd=vV#Ou``!(6nc0tG^u`VYQfwto*JSD)fUyrvN-U6e%gq(N zwi`^hxy7UDNmsAa>#&(Ja1rp({psp^a4!&$8ZuOeh}(E3uUFq%bCSxqcs<6{bgI7C zJdN9<_coKfx$#@f;+Uj9byn9MOV3!tqXwHl`DnDB2mz<+J}K97Tp^>XAz$sdV#^H< zkyArE6#62q1BDX?XtWxD)B%N#x$(O1u(ZJ>KsSLR@JJ)0uf7PB>fBI$9AmDXNLXr} z`FcV96dSOM47q-%d(94t*EE;oWbE79QKJtD%{9eQ{<`__2Q-`5k2i+tS9;P$N|(BZ zH(Bey%5b30A7yf0^3yZE^Ff@H;Qa2_S0=HA65_q+v3G%d_x5_b@9He9VT5Ft0Q=WP zI283y@i#@rEno1iOM)Vueml9 z?)kX$*!e;`Qkyjt5X6+au9{i)yL+FLh*={g-&JbeDA&D-?z-#3v}(!^rv>%LR1HEj z?zuNcC+8~f6t116=C>eXPV_t?YBOz6S}`jgnX<2dRiv13EH*X5pLDWAKwm#1j`=zf zXm~tO5RbU*lo)a&5LH1)Qza}}wa%_UD&Cx8dfl}?96d0RYCP2cIQVwz;U@G$MYCNc_v4(ZtOz^N zDnp_ZwxJkM9D#ufF-!eEs0psS@OnYao&WlMQ(PY={@@rhjLshei|N(Kj6Ln`+-jAt zH;AAL;gvt4tS8j!+j*9Kz}!XK=4NOY`NpdU9Z}q$qGFQte2Uq{PwyyEINeTu5T;{6RGsRfkH-2Vtin5 zwpy+zvN2J2S=+89KvA=|0A;RxxmwWgUa$rR|fi4vHeWYySrwo z7DOPwk-s;3JHXet8V}oOZL!_O`RUr&cUQUf=5%;;7s5ml!F4eRp(^OFa$(I#{NwT= zJ<=KB4I)M59eLg#zmGmayTs$hR3w-`lIy%b|7OqwyP<7GN!?s;w_3CIL^Gttuzu$u z<$@S%A+DtB+||i{&M(W0U)5Z39shEeR)4vn+f(|!V%!xJqISSh)-5E;HUjaHrVU59 zlXaWjSEBek4*JZVT&095M`lP+m@N=E+okP&6o@T*;_P9bh1+L> zeb-KdWbdAO!oe{`DsN3%AG^&7;o06V%V1tVH|OF&fz0X)77D?Yj>P4D@YUZEXKDf@ z&+$M2?%)5r*u9n95=G@}Rc#?Ch)nZ~NuyUY3_j=Qw!~PhyH-x-sVtPDzXJR#Y(Xp?S9ej=MIffI9rzAaF zJvKjm#yerYNRjD}xGUw*+s%dibS3Iz?Q@vVo2k}q%*_i_Ol;aAfcH#clhEfSOh~U2y}Y9k;dP|DrK$z|#lQ>R6=xU! z1f{bAMf8415r|Gq6B!M66+9*(@=f+B)E*IZc@Ept=} z&7eUY1&2XyzHGSQgMcPhrRq22u1y3meDG-9!(#G;GDcchy&aZ!+|HG6p39dU)@qd@ zF^~A&Qo&hkRcdJMt*O<(OMXEL!|=}=V}p8y+MWv-LAfOr+e{jM<|=$ohn#S=L#;Dm zsh4M54k4C9TDR6%F}d@6^bWV%U{hL7(9ozBxV2atu|{R{-mTiB@AcEEt4I{k6Y!Qu zHV~?V)^)y_ZatdP>lL6t6tY-#!DRA46n4LSNkOX+TT5cJR%m&dyQ5HIp=#xrc1+9OhG)%qua`l!L0^)ig}VoxYr&I2X)KZ^^JUTs)MVg;j;lMSvgE=M(UIoGe^xcG4c zgR&mHQYT+Mh!&j(H7Z{)TN{(`fV3M15sJjVA}wDy7|x6E_=TRPnuHU4!$zNRCArVS zhm}b5AnRi{ntPvf0BBVEE~m^4-%;x{@=6>9cz~1!1*EU(Zef6nMQRjJH20ryVW?_^7_AHDB>ypv?+K z6utnZ@OY28T8ONe<(J#=Uc{S>+)q#~iklr5DSOJso5|@A5RGLhlZ}*qg$944`{ufR zYRxeiG=I-tBg?i92Cy=wR}h6q*>x_ZyokzZ#gWOKmz4zzHx=RteE^bur%{4tLOj?H zo=3lFx94g`#Kh!~ESd;PfYz7&xCn+C{%e}Zo$g62ucP>oZS}7!z0b&ppILT~OK9{% zSC?|9RUwi({qWv#nseh%mh5+F*NuLWzxB4SWr7{S(HR{Nzd@ZwA<#^Xf}>BD^@hGU zgAKnP;o_Y#Gn)>)!jPtiWB^ToY_O?9O4>opiimT}<(`A<-TC!PUQwg6D5>O9qvM&n z&hqSHAi6%#43*9GOI+wx?llS6kgJn~czgh|NTTCPZ;9Z$c*R?Bs@4gPDuZ_GCrZ!B zyt-S_&iNuA?+R=M(He>FvWMAx7-xS=%x}ksTy!#boE0%kBcCixoYI}@krPx~$8C!p zL@Z)EuQi68rKgTwH-x4bCndWZo{3{2sF4Y0y_s3v;CfK5e?lZjoSu^6GL4@N>_+!D z!9mfa#G2UZm#?(2o88;`?2ac367RL6a!1b|s6Jo`tN4MhkzJx7j1P@MeSWoKKdy3; zjgt!kSEh`m&}51cupD@;W-}tN!}8p5!|>^(M2VIW zyq5C&=z-da(_FS71h7{9=8x(!`$Yp{)#p5^@(aSy02%&$Xq)!EPWd$<1|+;1b^7GZ z_IPLX`aS0oVWI{Zx8`CM)5YYB4?HcnN?%=N?u#y%kh1fyM^AJ0`>94ueBdg_Er{AaYPWYNGKAf=+Lhnxg-ZT+o@xULz^$|hh0cwV7 zHKA1}3+2##Rk8f+LJU!rFPT?fIMQN5LDh(pY&nf(Cu&qCQr4 z0<6tOK}q}5{@9PkDk2vo2%@cLPh9Thpk!ZyP|lO|tI%Zqtu*=lNN{g5qeR)}!iTi0 zH>m094Egbw0_3KdbF>mqANQ1%VIc`uz&E)`Bz;~pQsRw;Bu=&kOWyeOjhDE{Lc*Gmrrxj*Y7Snna%z!Q%0`=^=j-$;ZgbuvBVUP_2z>jiR z`%K+1b4os?0w4zsKh8YFWL(GpI2WU$GT48ajjLcNW%X+CN3E_0?xWw4}E83?~ct*mmh#~51;bMV^& zkjKla2wnlN^MDY_SU$&)?fYaV)!D{vziT>PO>_gXNK{he?v+05)l{%T0kmhb(#CfU30TB= zIM<`)7sNp4(!s?*d_2DKM>gKE>UN3_sc9qbfuKQt=b|yj7rw`ln~-)N&-1Un0v4bOC<|u@&%7t% zWoMUCzahhM>(T*d!UtThGTBpR+Ws>rkfw`dAl4Lv6E2DQvy#gzc^CeOkn;~Xkzag{ zt+i;&fVokgW!G_XE+!Tx@lyLTkT89Sn~fj`q!O>qluZ^!m#yCI-B) zeE?_pV4cR+qnR4)?g7O5Ro$rOP9pi}WoB*5i9@FfAKiKnx<0$^#(i8Y>GpObsRhW) zCD^c*<*9LtwfFY}LVC*!lNRH{D2Ra(SW&}v5K9p@5{D=ngG!ZTNj`!Ugl^WX=GF7! zpVZ6F2!kq&ybeVwK1fV=LLV0iojK73{D}D5{nT-7ZcR-t7~J_LAodl)k^Dq zuZZ;!A1HfW#HMd8Vp_b`{;slj-^QPVr-a8huJ~XHgBa?yiI7iVNa@?4dIL;V_eM3` zhX|%LZf4$~NmWM?8KY!fcHYwZQ_#xj5gJ6pO4-d^sMY=B3R`?dYoQJ8_J~(eTMz!i z4+Nl&sT8}h?eIiGg-RR@{x-nReh#tR_;<9Y68<7d0cK`g{})|ZSE91u@}}@u=8)~ zUV8;ZXEwjU_eF?^6kH|IHWSf!T7bf+W@8N5Zm4zekv{!ra`;=(6*dOnL+!SXJzbqf z_@8MQZtpl7g8>a@bP|z2yjOvlpcdL9wB-bg2R4*KB0__G)`wzoBnF(8IeYs#FSn`3y=ggoaJvw<4Gf-r7ok4cw zxVEDfkcz;)^~jDwGFd%h>paJEe@^hY2K?x*C;mWpeA57h7Im{ZqD7NK{9Lyi0hj2U z7pxodzYWsAu`&=~40@&U_Ywe}c`p9`UYeV$vvhpMQ}+XU0Q#%ZEEn}20FcP zzzvhp)^w$HFo=!~-&S*^yxN>s35ck1f0(R&1lCp-oOJnHEEr!0%ylpPdToCi)kS;e z-Rc@A}2Slu6*{3{_qHnYaCn|O8?->C>FVehrF*avxQyu7%q&Nnn3yjTuZjI zd7F8j?>V&k?d*u&vfA?Ls7O17+a8_q22pV$@W@u$g@!Pv$FouUjb%Q{!_lazBl_FhqQ zk6}LJ+Cxaw$`?%^4PW1nX5=YlS?T72TKhfXwW?LjveiG?SW{hFY=k$gG;R-ByidwB z3V*Z+4oeceH<^XoDU)Kavx;(0g3yJ3TmD~w<-cAjm41#58xH7)Ahyl>;ap2fodh;fU2IIB!-u--yB3s-v6P zusUAb0xlhT8(N;3hSXelq2v@M?a7VGxfo`yx$O06U#kxTDMrOH!#u#)Pp9LQG{+!9 z>6{%aeNTUi4qX>Vqb$Lzc0Lr;ZuCuvKT{q%rgC)6ZWPk?0T*0M3hRM)98#Ha!ehSV z0Fg>pXNR5n>&=fxpDlTE?<4IvBu1M#g+@EAXe@dSqh1&94%7JU@e)1X^efgNKhdEt zFx(B9MRNTEYc^gl_D^a|m0sMoixKFCdg@srM~y%Z21) zovj?@z+jO4nILgA>043lpSo|(6aUB7|KP-@@EkgI4lCT?)rOk3zT{FQD&Pf^0%(4B z(JSz=s}z}8jTU9@2se%vvNS*w)um;!)pjexE{OvfN|M&Rx6-5PJ_vMGgfdxWj&lXE zD3U{umS9SHbCbI)_%(E&0ufvIly5|SC(dTVAKLsVh;`BV_jy|=_Ule>`5!G`?}&-+ z9_6@wU+M+c^lXUZS$t)&T&fV8%nAW&ZRP~Xh7A3#=4Lxl+{ju{x)LwKcFg6jBq;YD za}JRfbbSf6Ew^!0o80OzweH%r7QtN)J>uw6LWnQykr1Ctt9e!JF4Lz+Ky6cyqn#%% z4vMUij?R5Z+I3{ow}bT=o+j~oJPO$5U)P=wf53ht5EI|oK{NEuW>BoPo#*ceD7c@< zY%kvwJwbJQpC~*|U}vniLiE#-5dRpF{FBGIsQ*r9cf;%kb6u_2aU1R&!nf@HksR&s z`Zn?^_g{epWO7!*&bMCYATv8pT8*$a5M`E$xhEl2#k7qiZZ$P6jm~z|~ zf~;-|Z{DbKn6XmPDY`i!tRT5GadVeL(LcAePC2jZ<9s#IgJWrNvl6j0V7i(25Y`X& z-5RRVH#p2qqkvkHC2MhOYrg4e6`c)Dkf{@!Jy77;4ju!5qcTQ@!i{&{Zncm8|DG}Z zC!C9z!nL^$1@oUf2jO>q6p&!^O=M=Q?u z{M2!;%9T*{LS^o`gVLdYY$5_ASM27q%e|1DEbif?`RJ9AlZupFYxdUC6+mJuP>L;u zeX(){nOEt!nei78W8DqD$s#)bK}&FvO>xL+_za{Jj7UYY0`i;d=L=V4K1c{ zX_%UVU2GD51J=H~FIiK|ydSc$yq?|hglDH9j`Covga6oVAUp6dyy=fy+Y32IUmo%C z@TQFTZkOi-41*b-R zG@j+88?bnY@aCo=F?_KRQe1mh%WZJO^x#d?XVdAcg|Jv=MysuQWobpp1?>yF+?eXc z_d31r_qtcDXOFm8#<)wn5912y(SAQ*b;n2z-&f%W3D{xvQ1M5}0dh$R(|1%qm3l%d zv^~;S%nL#(3tS+Own86f39=>rgk4k+h~aG+RD&me!Poneofh4+cB1O1kGSn5WFXp>YYZi2eU%W`7MxwxZ1rzxk5A#jELWfc!VM zoTiWZ!;X~|PNi0h))tc5bSOtEw+>`CMPf9Hb0r`Qb%WDs({~{jOMZZioe1ADxU8@# z%tw)#6$G6SqMW_hA0Dkj&)!?7HTt=T(^Oeq`DjBG5^SjY9DvF>s zRn3zB%hB{PSO%f&HdYCnqGXrJV*^$?2o~#{Uv^rR&q}%+Wu4b3t%tF;tCr4)O4iyx z`bd_fj&Yv1=h8gnV;SLDy72khr7?6+#-XLBT4}W2+YGP8V&HiKU)Kh#(7HaaF=AYp zgSjSBM)xLrDeEpSg14uqo_ufNaElvtIUP*0hiYEI@^WT6bMVJ+^D3?~B*pWx-K(hh zyg~2!pQ!auDU?$U_1pk?ar@M?Y(zLF^+6=u$P~-I^RtTya>Ui>Z5e4w-?;^Y7Btaq z&1KcJ@mG=TO(ek+3iiMw5MU@)lhrb(M`u$vv>z+f&hrfqCkz@ZL4S7y(XskX-UsLi z4Sm2|BC*7Nv}{SCy_;S16Bd1Y2d;IM?=`J1()iLsn0D1&rGNx+uBX3jK%Mj>{?Ih= z=U}Mdi)J>o^y`m{IBjD<7TRyvf`LD?^k@H%7Qp{~r%BSFo+)9H&PV3D%=D%e{OiM; z8gXgZZ-bP6Havsb>BQ+YUq-U&X)P3&$(B_ICgi{^orylHcEM7>z|DK2yLtLjVk^SV zPgR`xm3K)0mD)1zv7Qmwa9Z<*X=|Gg5Ch- z8J1Kx4o(Fm@MN$a4hUe~S3G*e$m$NrD>tMY>?O^@7JQpn@#kkIjI=d0rP+MB4wN*@ zsjCHxP9;20B&`|L+1U>lH6H98+%-83`k6eJ!gjEXe)-W#GzLrs{0iQCIQ>J$P#&BL z0yO?t<8fQ%=i$ZYS>ljQO&`U0AGPtLiqXGf5{J}CfbAnXX)XMf1We&$F6xFhg{OV0 zNopLR6*_&~)n7x@U7duR_S&LJ*93hY0;CjDMk2T@^WE%tW@H#+v?jNtKaz)& zvpskwVij&8nV`VeO`3|VTd>gjOos6w@!3#~a% z-!URYUu!=C_8Z=xd)_ASiq537Tkx*)DI=vhF+jJs$Iwr7<+7T5KlZ=VKFF?9 z^xzowu&d+iZ-~p#W%aJo=pEk|(lj&-LYNiLSAU;a#h-VXJc-6J8Ae(ymC~eeqTHBc zb)}`oU{Q+B(=Ufq$K2f&q{sQvCqZc=?Gdmzs!>(UJMB&ykbV0$iD`R~b>SCP>PyVw zYi4utwW#ouE3;v+b2`_zHS+Qrw{?b;ic(xo0I~+Cnn!9C@?$cIDpWYcW+*={wDMlRJH1v99wz zQtRETQ=!#@Nv~1eKaXxmv)JyOM~&9uszkE5aWWnc@lnN}!k~Ep0s5wwPnoR+|BrVD z<#$D6$kjaK8(M3xs_q9IQ^&`5itw)lQ0?tZpz2e2y-b9^hXcnNna>iWBbA@b54!-D zeUVo>d%h+b8b z9)h{OC#O=_=Q;#|00>|O97JUxE7+P zIxkl>;#^iEW$Ejeiluw}veQYau6bs*i#1*JeW4Z~DJmnLO_>of=&=MpLN1-DpzCb;9;KtdCnu z`u_UlV{dF?zQ3MTS(;5FX2;k!{@>;o-l@84#n){5NaPO1tOJ zu318^Mf}qmU54o|ytFyaCAD9X@y29B7F5Q?7_X#c;ITL70Dp4pb%F7KGJ1N4{h7({ z4W-h$i`9nE&`87qr&0A>*TpN1&xa*q{Z})O_0Q;PGH=l4MsDXcrUi%BYjKiNPdh6N zoib|Iu2tySGVe_K&Iin-xx~hDP*Wm6N++BjZ zyF+k?Ai>?;-5r7lcXyZI?)Gl3{O6f_XFkC@Yn_$F$~k*?b;+-*tE#)|dO79a3TXbD zUb%{9HZAzmdQAO@`P7TUdE+&F*Q-&#$XIL>4!S{5Y4j6$DV?!?=&6M^x$?fM@vI-; zenJ-n*Yb;GFWf)C6HtQ*%1pJXRVNvp#`P*M>)stTeOWTsPbv)1g5e!eTQpCO#_Mal z&oE%G;e-VW!{p`>?f+6FfIL`GAunU!10+5k%~o*ECm8((sE5=0BtXeLd`Mbgc-(uS z3Z@!Q)r}MBJgHiQ=!ykrvHj|!6t#z`{|V@_nSBtOTT*xDJnsWwr@al@VHoD+B8-E+ z0K@n~b2e9D02=NY7m3fR694|?8FOw^qYvqOJ29=xPfMcYDkuHunyOZt@^!%N1LQXnpM? zt3^N3mtfQvZ&In{^Z%PL-vlC9+tC3+^0YLziFnHT&cg$&a@~_u%Poc4im=%?={^X> z`NC=Bt{?*((qanoT?(#l2g_$lX3_kGNtO@LXn#~#{r7jSBj1g!jNGm9z{8Fw=aJ7( zx{Wk?WkDG+s^8rA-2DJup&i$;-`H>%x$7SC95FXk7W>gr^NJgYMS6Bx`MC2)A_U-B zZ<-rWWA_h*5sd9X9%I=mVE8RpX?hUWG4?_X@6rT|4sp4`0R2r-XU&bKRPC^5Ah;t| zLE>)hX)=^ndJpW{ha;-xJKs`&*_Ykaf2uRtcnRpz>v-r417eaFPcj8^yz_dxbz)`V zd}t^i?COo#FB@YyN}KF1f2r4rP&92QAU6kH_K@&$JvNd^4))$ImRa<+lq*{+BCy`9C z=yGr&2}Oq|B4Oy5ZpLg6WA>p`)?~YF0$tAv@ zwMy--p9*7?-AJ!*FT3#)<(?dnB@YmS<)@(mNU2jC^8L5QxN7lUH{D-)nfI1hL!8>z z?Adp4$>KjiOzY@hwwrbFP}A$%aQM|tXm7pqtFMwBqWV|-S2i;c0v^v^_tHA0O5|bP z!t7upjo#&LM`fm3@(}dW278vk{JnD#y~wdF5q7cnt7T2;zZr^)T3BYku`{PJFTdN> za@vAu)=`Z+3K>(t+XD?N31QVX@O{aed1DrXgouPr1*|&SAI}9y~&9VS}CBec6VLF5FL1-`8xWR$k z8x}9JwV-V|{KCWuVLU!d*a@T|{{_N-*@Z5?4!j>H9n8O`@?YePf9}Yn_JaQ3f6*lh zxMXz2utxs>GL8TElz#P3aQ`b2C?5#loT5U8#83Y_rLc<^tp9o4Kh0VeKNK9j7A|K{ zN9i91|Njh1XEL<*FF^Ru_daw;I^!&%6n?5eIm7?*k~c^x^(!Vqv#gJQDsO{2miCt#ZIECP&q6VP|FELbzs#n6`uX z)F?*GEh(2=p>+W^L`du0h^V)ZfSewF;KzC8U`YluE>v|W@}Cf#NeBvTg#|+nfvP2x z-^3sea^qxZ?;Tr^4?cHpD4>0~h%N!Ywwlx=>PeEa^Fbs<12eiE>I(7IDkeDz07V&x zDts598LsKr0?wiqu6U409HJY&75h{G!MeFAsdmN>!NT_DO#GJeS$rT-(6r#4$%E7= ztjo%;|A=;oX?rhDcyMrR2O9&Xgdp_;X42#8 zCeZbDf>9O7|EGXsQ?piKKVYfYm6ImqpQNZ9xXWlAG%KX-6`3nZ1Chu9D95qqq3OC- zTl2T&PL9+juQMDb!LqSXlW2bXbCy_(s4UvY1HG)a37RqvBH1PX5Xft5{ja_)4kA1y zLM^-xMvQY)zt!!}CX}4p=HTuKi5g%9f zeC9$_)9Uo*rs;_RBwG48W}*+ zDigGdbaZBO>r&aK{()jtLDNSnT=9W?bQ(5fAl%tSRZ`l;sekAPTffB5p<5#+iFH1; znPMkV8-8N^%YzjU;+xj z7p>OiLYdf0*6V4XW_rU$mg5BP+F0=8Is(L> z`^cA79`jB*V0CJPH`qjov?d@{1G zJ@Ej03aw36xYL&1l#PHi=0)}UGv_(wxL?m-@w*_VW0Gf>;n)aSD{C)XVGcM?i%-~! zXfq|O^q7b4)Ak+|JRAp;QVx*vcZB2j1}E*oB$hNrFSiB9>@5#2()V1-rDl#R*XQ!g zE?bNxo}Gz^Se+EKnXf?X;6-+(V&Zk0$s(%1zxHS!^AYPnZ$zx0sX^UN)2DZG#5x;j(h~LZfzV}TGG$3R2hi82WHvHkL1v_^;=H!R)HYRY zP;6I6wk2hpj@xc6sTDI{$_lOPH-l^NFo_aN!d;$;GU@l5Vck8SWHktV;QkP;8ggMQ zq+sCA;+3EwwRCz5RhgkjA74mwl)v!tZrmfsjE812{+=sE?c&r=%)$c1Y38EG#c7Z9 z0?m{`mvoN3%hWwF<&DLrv}RLgrE;muZK&^!Y-YN|n5E zzgcpb&8YImM8f#C^Z0Eq`AN2nDm*Pte>Tc2HluN!%cv+t>>zPQ#urP8-KZ$uWW#;< zE9pbAkeZ)kYqWkmcEz+C0z29<%vj!Kb!y)Kwi8>0Ud zD-~8UrA2P5-2H$)D~If$|8sN!F9C;qvBm6{<4PSEos(<%Y#hJH)Yb#=>=ksOmCg&r zp3D1iMTrEZlY%@2ZQqj@+bYfMeVW8R!*aAUo1ejbi6dE z4}CKd=1hTWs8OtxaDjDRL^DGCWsHg~Tc%OmKk~JBz#CSXBp>j;mT|wWG*S4O&E_t9 z)_dmVg}=g@p4Z8JDug4>N>6S!!SjY0{^CJ*_!t zZ%BGQ8rF4Q1{Y0QxsR%;6J?azv}JH778NrAr>OC_WDM#1=tz1xD5nuj|A}tMZ_K7v zGC(<9Rfi%5y73mwPGQ@$H$glF)cow#oqnr_#&%|lGWEph()*mn^zg#~gr4lCIYt{S zg!SHR%dK^}xVbnaeAZsh$>7};{G-J1`nKsu+IP*iA!m|$sfLprsjVKDZC)$b0R{+Gb!bN@ zn1=nBMkg1TsVm~-o0;;M2*5J-W0V`c3@|CPDTu)HwMN+$TA7Een(?!4eC|rbgO?1>t#XAVcClCP+3MDLWt6&Gw-;?1pit zo(|<5V6t%ilsd5hB;!5$PzZwF&~7LB@=~FrFbU;y!FU5&UtC`{NyoR+gQZv0`votx zkMcA&tl zro$Nf^$q_?)CV2WS+Q5?2P=D>`IO0Neg)SIJJE(O3*ye}(=}|WnN2)dG60j=g9dVM zrL@R}ZOiztbR4;-YE5eExlqm=b?r7yQJ;YcqC*w>^qJ)%ED_aE?}F8*o5@AMkR)3y zu1BRCx);2I_$}jU})eVTzzkz$!h`SZnr^!BpD{hQ!XxIHbb%Z z(=iYx;UN1ExZ8Ir{0X`dC@w&30k!KcuVUxJs$y{b=uWH*rlVn#YYL;$`t_{y3>l(} zH&Zoi9ACbS@snAtrZlg-J!d)7Z6Ky@65v83E`Sz)oFQMbQDDv+TKi^@kXO^>)I3~h zf$ck(Z|DI+n_k^~ z2C)PUD!906!8fKJdp_AEdfX=iUOA7q0;bTr?9rMzYwi|D`b()3$%cVGjY6c~+xYjO z4#um$;$(PYedHL=qbyOlYYGP7is$bh>xbY9Wl|uwasoKn@o<6`gFQ?NU&t;f7WdP9volQfY$imxz9r3dtKDa}nsYKVV?j4;5m^24WgxGHjJZ zAvrM{;qoX^&j4eXQ(I_`l4ulBh`gF#xcMfVzMXo&*J*KvE{~POHplh3x|8Uy)Zq19 zn1CY}5k@+Il#la3G~1Rt*zM(DV(woT+Q-)9Vlazn$_1iu4)9+uCzcu?1_)rW*}Yn+WM7cPUPz( zOYCo^d~pvnufN;u#Mn)zd%phx76^nGW9}qVKs8ZJic7lu5ni#1nx@tV+vKboVW+K_ zilXM^H_HVLc}xxx6%UUpaB6C~X*EX*pYLc3W9GbDlYpS9=98kWJ@oeoUio8og`cl( zDi6k)8OwrJxD2QXdFtp6)!CqE&&19t7FOe0s5Ol446DM@3MEw!yF?leYPNMv8Mg!Z zyZMxpU3Z$)D#x+H;DDPC9CWu~(c+s4q|JJ*6yrS^v3tVu@wA0QFVsxYnt-GTVIi1- z>-q#Zj*2Z&dFh@%ANJR_NoJQ8nK}IGCzQ6R5P)5xkMsv71-v+siDjpYuhpd49~7gm%lbJ8E#^wka`jO; zAXS8`FVYWreyWS}>YcHFKVe}VRI5kd4xqw&@FV47+x>a-#&4~#KPC@Y$UTU#{p==m zqyqPB$wFXBQpo(KiNxqwC#H!qb_}b{{f2LTRmtGA9MQmF&Pd?yE7Hy!$Bx7iX*Shf z3DK~oC6_C7I&Pp$#~@2NWH|ErmjEhFPk`U|gVHYC=~|;{xfL_Q2x?tRs#W-*CH|7L z30IM|V!EyfGk3@$8cgQ9)Pc&vZ}D-^x`T(S;N^ZZrIig){o-bs)$X{#Tf}45s`*Kl zF3;NGH_V{{rGZ*5UO$%WA--{1TZjqy-s{tCQ)TE)#JE#}*(Dk_RWQD(~+mB<0EOyvhF zq`+D^E#(Na46^JTBM-A(tgoHw9v-8gqspC9>vS9+={Pmyk)Io&ckLb>yWQ(Fl&mJp zhf|{fP4S@wcgvx1vn~O8cCoIb^yItjXXMd*4yM_3Z%xoTsk#gkT4npAZ>IW%=x=4U zI7s8o1$PUVib>Fma8%5+D0w6N9V*nCfD+f-g54_gxR&4pNxCD+8NF` zi?;aeeBE=l)1=TxVrnd1rH4`=%zhYyAFA88Jr+jYPQ-&j{!hAMJO}h(0_7SZ*EMMv zl_*y$r|wGjRH{fo4l5&jsa@hwe$toZ_%lIPUR+?as4Sn>7=*9u>F|oI1M8%R4?Y@n zK*)EtG9i%|4Z3+^*(D-0*hNionbB=LF1KfhCSJDi-l4cf%EePP*Wghn=$?3Qm@wu~ zi-(g{${#aQY5GcZMI$=`G_{nGr&_6}VnaA+RJV;{IFw9%;I3(gV13}&55DE)CGR!n z`C=Y48UkDE6a9EiTx6xbG7@$lnR4uL&My~U3HtFvrZHQypV)FE zEDKy0%<8I;u&>y2^7uWy~>6=CeP1yx)$=m?zPGkI}U3x-U+8%SiO9uAu z)*r>@(ldqq9_hvjP7dLFE(3~rx4Vja>if-OmroT^Gt5OC?J+4Ti%nX-!zGuTYa>+5 zxIV|#Ot{hJQzR}66-sWGI1+e(W01FKr24jXMYCaE%Jc8*TO6vXY5e{Ybjtbx^t+}EU>F(@b<^-;fJu$-?7R||DHjPcnErMs{ONETzkgsD{H#KYvh5-4!eYG(;-#)9NMeBWm*&*h%jbKNGXn*-4v^TJC` z2Vt^P0@4Ke7MeQNep}UYsg=1ryL8f()k2y!Hju2bwxU1lNLM~*K6nEt**xO)HR9wK zG0{*-h6a(!&gV}1tjG^FwUD;jME!00`VE4Id*3L9Y*bJ^l`a=-D z8bqSn;iLJkg(5j=H?9lAmk6a^(r+px%Zu~*Xmax<_2%N%2Cw45w2!I=yJmAc(J;Cr z0NTm~1Rm<{;XO<3dVph$nA5OX{8oG|*_1Dq^euhKf>lgrz+>VJRUbtGhY_>Z4pfU5 zu}wO#i0gtRIG*&hRk|f64FF{k9)$ROUuPgfT<>KL#K33fMZfgL9LnF;6>;>LH*sGB zPEU%8kpq|ng#L^a)r?KQeL88z^mWnRqQdCyG!TZ_jpJCeA8Z6mKa|Hce>NwuN#|SQ zMW%U6UyYjsV8I0hOe9$Uh$eKYb~v?0$KU@ETLaMQEqIhI%nPB0Q(S_2y@|pCT2?C$ zpTfyN_>X|@gXH>*vbO)!{!z`dec#_-_N2e1QI+?o=>H1S_JMOh9@XGVq(Hrud;F&K zLDO;uxm~tL0Bi721X-p9vF;n6S$?>Fl?Hz)mr}qh^eI2({s7-v}45_tqsltI2o{^;x1TMKdDjfM7#Z z{WeP}0UXhKW5{ntkc6Q;2rEU%;E<3lsrj6sRH{(j4Jx;R&TJ}4$?!ZgZEhNX zTmHdQJxN{&tC_2*j2EU}@zKq%nNASswa1#)C%_4JfF3eLWKcqpaD#Og60>A~;3x#t zvfJrTH6M(|urgO;Q^2v#;nmW@MQ>zI2I^k82s%zay7I@pIjV0@>uKnWBs0sBUL8>o z^m=`u;)ckU1%?X=4z5LQz)q!trc4w9T$V+Y#@CledXI;1)HXxhR!iHP&?vua0pX`OR59yhqzC;V)#$j>J!To z>78QT#%?yatsN=Lr(Ty1y__rw95{ZSV3YAd2hP0R@0|})RtQ=aMc{H_)>^=BY$Y+LUJ+G3IDsoGYB6PLg zv0mjY`*qB9y9`z^weSG>6#w=O3Y0Tw!&HIH+(V5SJuD`~|1X(7BmRw#gKVbLd{ z0K#+xlqNtkj!pm=kQSQGW+`NTS@EAWA3*u1&)?;rzgypdHKa9c#RCs)!_e7a-8=Ln zU6ld4=y5pq^DoWP(*hnye1$>;5-rpK9SX{{Fu*lWwFJQ5SiDckpk*KEOqQVG+yAc2 z&2)NGYaHVfpqk+~7RaYYML*gT0F7SH@m?oblh(D~*mxBM4DA6vwj$gL4EIsnpX#q9 zAU+2;r_WMqDz^LM(pvB37!Mu}gg!ZdB#CIwn?FcxHhZ9`R4U)yuDV$dy8$$z1aSev zxUtW8#P+z#w5e4lCNj>GgZBrppuG4mS8`ebniwS59m~1|lh-;L}p~ ziX}=}_4TNl&5v069mP|_I;@^zF^c`4z zTHKan6-`H`W$7!Zisv7|o5_m@L#MOU_o-tAnCl7+aD=-dhJg{*a00yGw<7R_TU^SQ8~~hD0Q%_4v5lzpz&8m=3Wmx4#ymMffR1SeVi~>owSI0g!A)rm1+aAem!~}tmpu3z&7bgfM13xvV|<^zo8F0HS%8ta^Pqt78X5O z0)f6>7{n5!l8K1bM6mM#xf^IGK`K5yJ%UWk%o|~>t3mS&wehx!^^J{H$a(D>p~y`>-YJY))b zd#-_M9#{Z8R z8y_3A#{1orZmtl`!8HJiF}}I|HA6DQPC%BzHaql(D8C7Ou&f$E4nCDl{R%$WOw+L3arz|$bfNpNNa=uR_ytvhV9|1U2SMO8TB68?}tJD6b}9YxI)JbDbGC= zBrLl3oVFK^esAcHW`=5B;B6LA^T&=yi*+zCs8k(OGCW~`4$On`iaEP;vPA_5rtUN(v7ulLVFcs_06>92SA052A*JS=sUnP-*|GFN|#53dAvD+=`<7@52zhZh)CtR-nuQ_V8HparhhTp;FOQwFm~e9PhYiVD>v{s}rhtBC!hqedO|teH&+fNd&6SpB@7)mp)G zdb_j@9~&Y%CI(Q@zF^(->^Ed7EK@@b{Yzetpg=F*zyX7UXB_+g_W&^$#aWoOyWgG* z7sYT7xef{-1K|kiVRm4d*Rf~gsG?^5VF~i$sMqqM>IyAb07!a5wSwx?-`0+@?t%*x zcKy}_q}{TqsTu4-0;7;YqZ9(64nN~S%xr}P3A!gfXX*q$!y`;GWrtmTEt`M0v`?Ty zBJn3r-WdU5zCt*yn3G+M+PRq6+ov9tn(SL!i&AS$>L8-k!}B^6!=|JT zV}Pc&wS}P1FYXLBm-<90{!LY~_P3E5x3FGQYaVd#Q1(SOa4+TlLch4gh385V!rQ5S zuc|EK5$9YZBSOMFuJ*{$qq3YOHk<7koaV4n(c`Sry1j;+p$~7@y!?&ve-ZUYUr!|O zp1@aTYhA4eNr7>OMs@CB<<-FV32S~(7+Ij6duN9&!IgE(G5i>gYhFAgl2VscABJw_ zPup{bl<6<*pL2$zRB6g&9p2kG9T-^5#mRcOAB|9OvJrM$o4OfUl4;(03~Oy9=xrEH z$I}F~Yq)(qdT}s2>(n}pKvZFZ65x16n>4#wd^xUdW^e!ZX@fUPgYnk*UpCl>MIb@Mjt54)xb3)>Z;)LPrA+=d8;%Bp-EH%F@b&;cQ;KhXZ+co)& zJ@5N_dzU>J{c3Lu9lb)AJ>N_Q09ry5(!6NpLh-b1VKlRLxM65hY_79i++)pgNB)cF z7frGBrT>{@g0;K_e)YzKJWQyZ?TG&VC(52}=agg1T>~(-_$mz*lX#`N6pNv>+t-qf z!<8B3!)(PivU4W2m1n}>wJ_m5c6EuV0k6D-&cTwTqf+wJgrAIr4}!8!k)x zcHLn?-@ zSa}DMeiPTA^=E&5Y;%4+LTRBF@gqe>+svN&kock*fCpl1uwXGwqEB-9SW)pQh8*4q!zA`C zWsn34b#9B!MfFJ}ZOeA&2!svGuMRSJxY~hb4ytIbBRM?;itxPy3zQY$#R+EerY^w7 zw6fiUbK<67>a4LwJr~*6S$u)>7<>12Kcgr7xfppO)2l8vt*Yr={)I)I?kZzUY`lM! zexIHN5x52M6tnRlT-C5Iy#S{WeXEFCQ|<8Kp)zR6{iN^WoO3g_i1hd0yIrz&w~{M9 zq;$q*qz8vCPVX&dFl$_knav|xYuoyLf`Plej+r!;1n8G;UL4iNXxoo1eNqXYb@y36 zoDXo4-^`O5YrU6fg-SBDINzk-_yWlmxNNg3f+)Tev*6Z_tQ>io%81m&0#aj)3fc`2{B)||AH+#M@WL8XR z^CBOyj}JRAXkxlg=@%Kd&rz%B7wK|=bHF9q)_mE3Y}TN$$_iRTV!XA|y+?1b>8=p> zN(#i>A4mu{DwR#k;#pi#qbEglPOdbF)aH|Ev5Wm(GQG8QPN)gb@Lp$v_~yJmp&^_G zlq{6La^!QTY$DU&1vCXo{Eny?qz`%1#3s-K;qwb?l=XpE4%nC-X(kOepJpIW6P}jc7hu< z3suHmeDeeTt9Bz_a2`84`|LLTRGUKbT_Aca3wj{8(4N5s{mdXtV7}-SmC7z1->WkB z&JJnK`ZqYckq5%_WZ+gh`<(7{CCHNx0GS_Y=!mE>c%Ej%$eM}KOa(*;?I#=Pz6~}^ zc+DI15DV?CHRi49P|Kz;1I>v!H{cC4?5Ts>IWb?)>dr-@>=oWp+7?g-a0h|E?5%lU zeFT>0jJin%x)mQF!>cx;n8*<3+)uvzp0l&$@OT<8q;xJYyB^lQJ;YZ#eXMyHZwash z7ly?tTvFoZppdKYyq$M{Ni^5B;8FeYv-uROWbIeJ3u9GnPhzN3xdoV!QAXElNDCda zAmJ#o+pO+%>ZesHUk@@`)sTgY#n#u)s4^E4q8Cpmj%!N({##)03CwFW%-lQ{h$z&a z!(%NSe^G$IqcZunkxH2eFHGXWlP47&Z zG&T^w5u8@W>zmY zW=}1V>g}0gw}#nE?Dj^l-YU9tc#KO2EJLwPb?!SKr9_*S)RIV@kLYX>z)60GmdgD2 znQ45GelPd&`S{YQ%;Sa~dz~z3?b(^r{PgFG=Z#U?+Cv%e;Ymibd4EPLg~I^S{1J~z zEo1{1fkxZiL+#PGmHpB$yHpdpW=uThlLByg%9-)#=iwOc)b~JFlPi3kW*>6Zq=>p9 z>%Lc@2Xw|y7UZ9d)-j$Zo_8XU>o~syZX2vOe>%CHzhB!qET70fa*zl=y8KLoFF8lU zx`KpFfHpGLtJa63Qgz0%jD4Z)xxyU&i39{m6w2q9lP98=W;4?3!<2T`b^d$0lC!Nl zP3w|SDKDtk9SL%BeF`JHn0`mby-y*j4WO*XJDWGlk{0BuWEZPfskIWhjv<%sR4I`J zQKvGro(d(4YQYQ^*PvY5L-~@#l)+bz&uJz~8FnOmfyko^QnJeRDTBX)Pg6UK z`jiiCcS@gsTTv}wYNg{fAKPTl2AKRrX)HYvc{uM6KtSaXGtP1w$u}#v}7>agnA!+Td804 zgf|194}XG5RJeg*|RD8_RgU)))4;79J*C61j5sAU0>$IDBGXw$OOx`h$%K-bb3qg zt9!P)`@P@cQmtZy~jZM3;s}dqr5}ai<;dLnEw!u_rqVrBI z;C}1x@S63N0oQ2N@GKyt+dxrKvUutwxpuG{II1y7ItyI}9Yz`2tUNs@hQRwSAIpIk z#|MW1^p4vXTSv8>Frc6x8T;ik#5U&w)!upr^38Z>1MJ@EJ*FiMRvW`r{gvDE+4Bp| zoiLBZ`~XxiVOP8p-pS+!5S;MBia#k91=G7pD}_uNI<%8X$VCyo|`*s`pP+eJsy-f8tr=8 z$4t?QDEPMffb2-{BNhUzGEF0_&4x0RlXwbuv%Ccso>AEwieR57phdC(>FQjkU@vF=j#3OhZli|66Ll2 zF6-g;>n=C2U>NzV^VKu!?J2b16;BE8iM0ufS}RP6Y+VqQtd^s>XfsQh+X;fzFXbqkX1gcKi|@XlsaOZh8UjU8Jt3QdkM8w^!Kz{OF~D~rZD7640cFu z$~@*@NiL9yw*6u_%2ziEwmw$3ttnL#&xj%fDw!abJA^19wudGA0{3=0mfzhHR*Prq zXw_RhaL_~qnAO7qEq)~JLH(T!^dE74p*KHi>1aHJus%!>pXfQ{;23nKH9UDUY z7)JaBsji%B9+$q$T~oRK;fq|6RWNb7BNoS~j(wCO6#e^>Yz_@_Nl!8`=Af6`*^+{6 zQk%{w>k%+?K${`Y7iS49L)32a^{+o^n+xYEY^16jWaCc0+YzDK_;v*yZM|U(Cjq&hQ(N znaS4kBKQoCl8kxYdj?+$PdWE|eD~1Xzrx$3q>Q%TB;>r^fe@8Y2aR=0Nt@_^T#a%e zwYwsRPw$L_NC&asV(=sR-sKe`VAu{mE64al+D`$b(auk6o1&D%Vcx@dqV1*|8*w1M z_e~6;w?au+xI~FO#YLQx29-AU&XM>jXUnNcIM`JE5gXJEqw##-pXU8bjijs->m#c1}j_V0e$-D_xX@x9T0MQxAfF3B+_p@Ach zDf-<1D3r9(VsWF|4tJw!_g0qZ*p7a=y%3QUy_|7}dPfUv22U!##6*b7Oj;*ZE2yo1Ve>(u znL7C*@5q`#9rKzG;>}LLEyJSB$g0A)V&lPkW3#~r3UqbaBGw6CIZ5CW86GpNz8~<8}0|2&giN+x8hZvoV_po0xF?44B|^B z1}iUzgp1v;Lm=fjFWGnLBlAF=eTG;9Uv(ndB8E~<;9nX%)q2&bZVh$k1PhCDQGJ%R z#O-~*Su(3()b9r_y55U_)`>9Ok(_r4idXY1-{w{6^>lpd)vcAQzO@gFgaxS?W8~QdkGOUoyKPe`yr+A5 zyghTo)h;0Vi1zpHBWEy#;Lz^)7Ki zJIb^CHf?QaULop|W~A4KHwjJMr-K5_Uh;skFR%KcL&E`4%>@xR<-ktM28mKii6?Ib z8#5(931^uqSWSL4gq3?FmfOBuQe%Y%S!K8QhkMlJN$MqtJFnuwFQxN<+bmy zY`@jA$ptKn*eND%{Q7jN5@0R($-ihzdjI4EBphK8cDy+4j#ct9LMU&iG zN$?blwQ@-Q@N(`wgtYEbuF}IL^{xzLYF-2cL9j$at=)C=FoN8!8UjDn8^tE|2a)Z+-{3CfwkB#N(v8s1FYgDU=0dDz!UpbRyTg zPlermKz!L{>3DV#O4x(wcwTM|F)~8oU+`yJFK9DEeu`bN-e%Ip{ldt$NF%_L-%oZ4 zfkhOIqMv%|kEm-Zu?c@#Px6DXZ#tv8ZyS(&Q(A9u6i_2CT`MzP&*|=?5Xq3*&J9Xu zWfaWr>Z?%&D~<3g)zgr!Ikck+!J>^AnauX+wm}86?+Er?{YS3DFMFuA(kf3}uk>t;gOv zCK6p=Fw99ml>4;a^XCS*e(~??^GsxWaIadu`Jb1<`ibJ%t(bQ><#Cw|IWP6g>6rU+ zlT%5PR*7H`hUv|OoA05*rAFvY4J?vB;iT*ar5X=9*X(qQIbZ+yt*dC6zgERnWAW7x zOQc;zX{VPJ*Gu%08S@8$bkvxTbPwxMFzAk?@1n4#ot9b!8OU{^PmL}oB; zY_s)EX6FeXAqp@ETw%Zc#Nhha0Dox>!;#NQn_p|mk`n1b_PHT9M`z(Pc@?<=+M$ub zGuu^OS(e*w0&>Btk}m60ejC(++DchK!dBopijPbEsb52LgFtX&8 z%~eAsNwLERB8M}ZSKdV6^#oksX5qKG#Y!!AYD2JMz^tiNvGHKTx+4Uz5Wr}cQatKw z`e3$w3=L!KTN#YY`*6tOrgfiW$AxYY+$a~+M2v=ASjy#{mPd@37VHT7au61@IN4gI zS7ouO%Pwr5fU&YzKiK~8EwsePrrz3B0d5=9ok{N*I=9-=5wNi4(Wjl_osu>T)xD|; z-H5LvlMmMVX?kj0%W=-g)#bYxfhkybtF?|rj<{|Nq46=g{$-vG?Ni8dD=AJC^=m2X zKhL)kc)sKrWLopbi?O_i>4RLaEs9d*59ifLsv;2{EHtDet>#KmCd!Frz#qdvUA zb9%|rchev@z)hsgJ+1se83CHR{*RvyI0+p|@ujv=7C40guKX{)=eRkRg9fY0wRXW@ zN6UL&l)YeQM(e*l4@Pb}fbXSz$}6x4x`~mGSZFb?98zgPRWF!eC8$}lpkT@PRV-Gb zmap7k<+$pWpcj=gSU*5w5Wm-AA*bMhQTX*HO=Tx?q@SNd+Mbc29dI7}+S_DmxXX(u z``qi`f>yql@|vH1@U-WXOV#7+5my@$WZXIRKao0_q?jrmer%_2)x*A-+eB? zs&&_(RBV7N+Qxbu)?dTPTZ9GrO8$>?T&@J~k+QkyOR1zt4w?32Da^Gf*yvIeY>hi0lPzWc)Zxy`N?`Tgwa)CIsvu&=$WNw@qZP%{7Q zQu=qhQY!^eec8o9hG+R=>XMU3`{8%gDGqT|#d_tIP^TR8!aepB4~^c(tHeJPlv?hR7#v(WlnZ@BuD0=K zIG>0}N%8WBAM-5LZfA5SS);)&T1G5E!81f=U<50}L0gSo?BQum6REOo*GoDq>@wCz z`IU7gwSs=Ow0FHZ<>mjpST_ITLz$M^GJQG`(4CoHLYVYVTm09*iRs|c%5U?jDIQh2 zd}eoKqfc?5o_Y>fJH>O~q+9&SvUB%E?tYn0~oirtt z0}G=#MYy3KbBlXdT(uQ{y@i$nhGnmq;?l5G|McGBa;1Vkdbv#2R=N${ZcFRNYp+>J zR9O%3OUF}9yz1PME*zMiQ~tZNqizICym~=&E1_Tf&o0Wi^WlXeIucsWxO{k2ZS2$-`2pl-nsw!G*&?omDMt{7JV%f08~ytkZmzQ>%n6{yOblL zbTv#qO%5)#;g%}>X`8=LIKgISlXgx>(HRk;vV^5M7P{GX?MC2tE&a|F{B6$lH2u*^ zJs7FQ=id%?ou0_mL@X3F7L`T`GL>CbDZriZ6jEYud(63JDAmU4qb~`a7~FF0zm(5b zM}!XIMCY5K7W%le*=ysR_wRDt6#hT<-hwx-Zdnsf9LJ71W@ct)c49kbW@cvQn3>&X zh}~vprkI(TnVH$sdC$3bzR`Dbf52#@t*ut8du#1lwF;lA>VWCI3#PWsSDBx>ug~nP z$AN?wu}>6G`_!(U!*}jTqn%ChbMtp*1+MTz*(HMW*1?*TttIc5#b{}MeKMl*(-xpg zkjeknF8}9rTXw3CtCq|k0|ggz#nocqOU}o^#jLC&>nY=Ry*VJq1xs}(sJIJY%F|(N zgI#){*OPW13oP)J*p8srRIgGpY)>3TkI&Zv136n{)wM%C&XRo_e5OO^$632qQLOZ4 zlRA8ZS#Wdnq^lX#ODkp+d!bl=lDu0N|37WbLiZZlyeoG(+CNR+hs&Dz1V zyU8>KhQCQzI_HgvN3@>kd|(}@=y>mRW;)8d`3qE4-g*k6#45u@2B@|1hlL$ah3tNH9PLBzrDiD z>EmL-aco!2)b?$P;=9#NnB9uE0*bdz&1R4GUlv4#Tyk_M&C(3-sjcEUTcierjPl2O zp{H%tjSgIMp&h6W55<^rZzi+2$Ie zVEi3rWf#5?zF2*YOm2Y+bAY+rtW;=y8oP=3ZrK)Y9kPqgMgOV$bSILGJ> zVUvQKOM7d9*k&TH(l4(`mr-$h(-ti8bIGHvR5EwEg+N53x!sd&o}pr_3Fa#tap)~d zc;DS5F_T$hXgI=l)eR%(mwS!G)n8wNEl0`3DN7S*h~$TekIq407i=gER>2z%FDz5afS z`=!!W+K2~Y8Q$Nwm&2FkTa+QDYnvcGFeSx~)qT*I6k3|Tm6(r!7$zm9xoKx3<=cQp z4RxG~*Yb~^IETg#Y+f4#L3tvBJpO|Z^W8&k3 zS5_=9%TqUg$nO6r2C=x&0s5YxWJ;xO7)lXD-zwW+&oL|RtMR}!#_MCMomV@op^*uh zFMWkK=6z80(D+TQS zN7nfFmz}I+F}WTA7HZVT#@hF~A5rV)uRd1N{0E8oZyD+X!h zh!*}gA^XqN@)9ZCBTB31vF-G!BUEa>(safjf5J3>yQb4R6BL%O-xy3L5crktC3Zy-%tMQxPdW4NGyoCmN0v35z6TQW1w4ZO0yj}CQ%q(` z+^k3W#k=ym0P9ATSe(y)f6hL%}{@H=-!HlNwlNNoh{Gb08~Rfz}w6vG>a=j{FS)6n&1a`dYTL672i)qK^^pG6IBDt z79;Y}Zc>99crLcyuX_u-lwD_h?mNt(HIOHB>e6y@qh7w?z8z5hRcevMK!YhR<@;@+ zjCC)&p-9x@)y_&My`7q6>C&H`pzDqgOL@F=M& zqlZiWcBV-f;KAkt^U4~ov#M-I-5WLO8rjx@O|W-sK&dwdZMX`oY-%QDgf& zRp<|%{tOpXCg&>spLq-Pj>rVIS`=13t81k!Rp8JCd@?uYO)WT~iuGUXT)YLnSp=ip0znz?zv%1Ss7|L}HdsB!^Z~pf;@ug& z1NeD!+7(Tlid;<0KtxCn?#sEn;eq57f49fS0i`oFJNU4Q`EVPA-{kB}#$#xttDc6U zHGW!e=6HAauNWt%zk4N&+c5`Hco9BdJIopa14zAmvT{4Xv*I-yqM>;tP)o*6eERoO@Z)?6p z8ph)~aF{$?2Z!oP{b_3)?qzIoo(Kc^4u*!)urahu-sBSd<-5|;YHyo!HPEaPI5SMO zWa<$joJ5-M-rDd}%9dT($YoG3jZyoUJ^RXWWVaNseB3pw#<l+;xKk|g3?0T*Q9Kp%Jtkt$d9C`iK6!@VV z0p3@fgJp2H9l`fP%`!)tb_YfWA8OB{LL>dE<@%{+>BeL))wj_m8X+? zn{%>q+?DuR?5s+=?*F=c-hKUk5Kusw-r=S*i35>y|4WS$|CN4wBL8+Fk4k3Na`m~A z3gLh;v(xj|o#W)ZZ#oU8id@@0bl(oYCox+eHe<>6Wb?W(JOO`=b1 zX;>kx7LVPzX~i{vB{B(~CjbLx+K%^A#xg*z(JkuaJK@<+y9YwYjtdrO_F_J>Tds2l z+TW##A9?DTR5}R9%MY7usOQsNPilq%*E{~0gQ7-~|D_{+OC&p&-C#@5tp#QEse?^+ z1YY1oD(tnAd7)Mgz`>|DpMv80VgM%P#o-Nc42PB&vH$2Of{!1E#Gy7Mt27Ynb`y$K z`Y>j6qedGW2yVL9<^%VBeB)Ulht^o`ZbAzus{V;jf1*VVt)YZzk#FC%3$}7rNHjl< z0+)7wuB|Hve&=God#>4`O}|FyHfHS?m$$MWe4a(y9|cgpEts)b|ArP=_Gwp1r!!2g zvneU__wQ!gj0*7Ce$aG}s_7z1O`U(CG?G9ljpX2P4*3F$DBk}@Xvncg?_;Vqo>p(} zCD6{Ycs<5K@H}5SEVO%N^(`F6C3ESXa7~0%@8b#X#HLXOX9{SVi=38Qr&g<{7sPvk272dT1nihq zqv6tG_tFz8U-_yXIrmKYe_lf002|uiY}%>MLV8H$uMW3%+1=*7lIR0b>Wy*fz*oo) zg*ta;BT3^s;e!Y_!)A zYmAhb`ZmiAO{eHV^ybTQx-j;|4tu-buGnA>vMUBu+|%>8?z+kyut}5*%mHmv;`t3N zO(jYpH2zmQrOR+~POyz(&jseUpq3TAFSQc|Bc_-!$M!ZPHCWsJ zM*=*TrMjUy=8YQeLT*nHd#OyP?Iq2Mh#g`-iYbp_}s~HL_J51Vd3RQ`v$WSXEz=PBsQ#4Pfw*rT;;*O5V($-{E z!Y2=$v8^9j8Up*NPa~Yv3xDX zy~VFhL8ys2ODw#wuqH~Sc&AHq2TKWT!d^>Z`+g&mm)aYm^G%hkMTOQ{`29p`bOvj~ zuwII3@eSHuoP{VVaUs>nTC~Ac*{W>F!?c~v!EC@>4nxA@^B(s30+YemTRcgu%=XB<@VEpZ#no!c)ObZI40nP$p1`p(#=cX+ooaa{foK z#c9vB8nJ==4;@%!~WJPvV@{Esh5GgE=$ z7lo##-IwHSK!REgP2*T(Y^6>lDYL3{w@c8pV7LI7@>#U-P1&_79c|5twZS@yTAri@ zmR2S!s0d9o?wg$`GCQMcr&lCHD|f7n{1%IUr5!^A=By~>&RcxKu7P*B-mcR875QLy z?-;93sLM5H%>1+ENh=D48P@@SPFtU(fbvK6d8DYsV1qqV#{8~VJ)G5cXlx_3dN6P>l9_6K1ora0hpL z&E{E+>t+vBddzsr-CID4Cf*c4=CP))a)w85OyPMiL1_u1(v5yYQ32&wNl)xX@Ot=M z(hcd0045K=E6!)TwX!w*@Ni$#yPS0^H1^MENs!sk$3q?YJ>cT}Z*s%p+>|<=2S=in zFUg~2)%yfGp$jWcVw^A#;~kFTEq9sYNBk-+Wmyj%7h&?K!48-w8lP+5vOcAb>!?V< z;A!A0Cmbz;eSQc}mEYB`r&&;puHyfs;ENTl(YlXDH>oJmQk-@o0b-Ev0_-@}D&K!9JwkKbz z#iFbfZq?WNkN1lRCLO!nOGml6zv4TZ$f!*6m;gO8AXi+Y8`LZCy{Vr7l<%D(i_zsV zv6i%PVL4s*tMkxF-gEz{N#k^M+My3=AT-~{_OV{~woTm(DAudKbNpZ^PHokubt*es zdB5gm;yHsRoQGLe{D=+3mWk1GZ=q-*@$Uraz4NPji%T?6mOFT1xh+4+WyiYps!fYF zHOF!K=yPK0&(9@F&sYfcDAxTjr($E9@Xp)maR!5b(soc`d6)brinT2{J8ymH%2bH@ z{;#LzFDVl%2+@z2$&dZ^B~jNa5KqrH(4174r~N0HYGiw1J^=%RTQLSa^;mYlF#eM6X4>CN%ieM<(n=P!YOw#yG@zsrfH6(9lQT_4?gxJ#c z1GlHy!$Tg48vNtN5$uA`T&^@ZFlAmpPRvh66w7P*9xiquI-LUPe8t^f%Z-ALGdcXr z>)BilDcyoCDVhx2jsm-@B!BLfd*gi4!u5Tsb-LMHh}xY;>dytR<^k%_*IlJPZvO;Q z_;MPqyP4J(m8-w(9)Vvz`}m$0W9co19A$z}dZ_!9&-AVYj9hh@re<{efbt%&Vw06zYebY|c1D}fJEx?GzdmxlnE@p=6gyDamWwGH- z69ox)IWUJaZvlsbowqY;sInW6F;+@!TVVLm>-2*aL3l$Tx`!+T#&9S*T)gNR?$xJxRU`&k{hhL>LxvB*7c<+ zNw%a|C^vf<;iUs=x8yiWm<&2Byvp;@9%Z!Zpo6T)uIaO|?-R&}Y|>NakTlCXtpFwE zDuke*_xt84{(v0WWQ$%(WXKis;ogq;S$d6k?;w$2Wm_M1WdO|_>6J&(kn1z3d7(o- z)4*cI`3>M>m9*S644#mRLEhAxZsp+s-bw=q&VPp|Y?F?E)u633JD+E}uw%0~H(N@T z<11y6)R$18gCR03Hj5(K!98c9n=hTY%%IJiXTC^%VT-d{-cX>WP+DHCYI4@7$+2W?n#p?(6J`~gf@{fM6tZAR!G;LSb?5o}p6HWin7*KPmWP=+)!CYR0 zLH4*1kiYK>g*c!U6g>NgcS-uLzT&3AKgL?r@h(2-6bbdZnfZPwMq2$TtDSrd8kaP2 zojc@gta2(B^)zGk0AD&6OBqSRwhJ2aoWY%Vm%QNDgvD20h2^QWHB8wLr0a2}e6X<} zpfshxylb=E(YVE@VgT&uFiAANO^%D3s(xKBJchXd-Se6P5kT3Ce+Gx$gP0}L_2A$) z?@b{ZsNI@M27JzHg;p~XhkW*WbW{FjY%?>ntHEwMCyx1A7&qVJy|xXL{>i&95qCiD zm{>p{$h*ru6(QQ@v3aif5ub7N$&R^x)6$ZaVa1&zPG_ke_)+*GrMo3I7I}|Ia&J#} zdUNgvZjSGaZNy&xSEv;V@aoHh?CB*#t}z{?{m>^yy+@I1iNyeMXhTvTQ&== zxp%J?GTLL2jv}p+hk+=7@{L6+vx(Vdxx{9(m3O1xwV6>fcy2hvK44g31q4Z)z9+b> zpX{d_DTz49SZ1+q70nLCNPXU(D>!$oLz)L%Xq|Cu<@M0AbItG!==U zOp8%to9O~ua5eiv{KeCwHz~Z?bE-+NuKM?)YL;(GUJ4nf^;`IR5#aUkIjpX^h`c%Q z4vS_?g|-%Q!i5lP`O++SD(JxJS(e?q)cS%5ZNB&N&a!6wr;3ds7}IsWTfG(wwsJp3 z8$F(o$eK{2^O?WK;d|+VWcV}dV~JgW5Hi^pv&^9B=Lv>uwA*uWnfhvN&4dV`#V(tCX2ZK8Yk`YTNDZY<21F}ou1 zYG0DJ3Q)AWy8ji#j+7XfbX#YD`lJOdDr8Be)EbSDmuF8hU1CE~i;&glnd|20?<)sA2P% zq5Yk`WhNQP`B!Zz4gi2kK|Pzx(nRhN=2@g%voe~DI8Ci~r66r`u*^#B(zmn7?oAg5 zq`a9u?Ml?T^U@zj&62k)=Acqz`T@p{<$JS8ArGkmwagFkDB_+9Y*Eo0d^+VoQt2`8 zdOJOiZP*$Nn0xw>T?$^P5v`_a5VXv<64ge^*=Tc&*LZ8qh4V>D^iHp>@oB{)huL@fP!P`E*{?9rHVY&>!j>1aj;>+Z21>jh2@PfUO&I zoT^^pxCZQTv6SG8;sh5^b5Gh-(8LVokI7CZ-g)&5T}4zAkHdwfSQ03%CQL+~IP)g( z1jCKJ>WemegeOvWDxZxwe3X81UbrX<5PL`IS63=EMHM3yBGM1dnUi_6g1@(Gj+i!3 zT$p%mSJ) zO_bit*(UUtvR&QQVW6U67E|TZV~0k0rSFP-a|mWMtjfhd8hb<08uYRQDkx^c%Hr2%2#6n%! zoVU}@A&L}RCy5z%_NV0XvXLXlrHIKqema}lqs=~gkD;?Yh3f+|#iT~=$UeN=f^qr% z_V}QYND=KldzWBRzuTFdT47HGo01%1%iW0SIhSsA?{6wm3j*?h5rxeDBFh(gBsb%um~-A`wfSmhKNuFa%~*!Cq=8f-2T`FzFd#0v z{Gz%;!a*JJ6a)cmZ|589w{kRP=0zh$wnP8Q3=fmUYpbr3KVVqvl)ySQZOqc(*rSPH zqUWvg+=5|{Az&!cCSvE}LYq)8&4^W9&%Q(_;YB{cm`eYRgaPi0>QfQMtM60~hFXvQ z2ov68UvUTKdryL1&x{RNb=;z58*kOsjOlJD+Tek?Sx7T-1O^T%|3|dUFW8uunC*OR zSq<@wWnPVxI@)k4vr30-L~GDSt?ROSa%<`+$H=j~6;)Ht0-jom;j^lFp2d-Sbf+q6 zj}Dbocqag;P4GGNRcSHADD0Q|Cx<%xf!dj+5%UEK_A+Iv&;++prK4_|@XMwh?6^g_ z`wnozN<_@>&iD+-K&m*#r8k$XRK^ z6O}I?3(Kmhk_>2SX*29gE67>dSuN`%Ya1Np6&O4}eXVSj`AXPWT}(C8pE6ff+(l9> zTf}9S;RaJJ-mBa70O9bt<%4@IuGiDsM}16eYBp6eH>u5g#~F9(R27DuqJ{zKygJ6Y`X>T{1$HuJ z4prsJAGlj;;Oy$ktjVv{v&B;MI%oZCOG`AFAnPY0SRf)D1rV4M-}l5iD>T8dn3b!d2APuWp-F%tPdpvEh>TQ zDh{TQ_;c50&HBV}&a1FYkQs&Lnc=wjFu6l2G+wp6>V_0KRaUI56ved0c-96`P^#Rf zAA@1TT3ae@T1dk_WFf6VmPB)kZv-}C{u7lre)^gLygk zBj#`SS<>xsq*2Z0A~x$ftH6zXpkZ=B<)!joR<_~?v(qTX|5s?QLAxpue|&wCLEEc| zGIgmA$z2KZ(n*p`qlzXSO$zwA9dYw%%hTkF3j;| zlGh@k_7B2J-&y?Bir3eQ6TVj_lZ4WuQ$@njq};#<;zwp$0}Q`PTAiMmCVwru&tvKo z*`i!Mg$idhTzOjo!(GlOV6%o^x5K`z)>r{2%SW69!bSkh`eeC=e5U`jKbC8^m(`3z zd~#wcxL*R=a?x;%zV8SiEZfGQLA+P)`n8%-f5){&<~})ItrdQ2iqyWb<5@n}21fg4 z3!OjgDGK(6Jp1h8A$mO|7Y5?#a>_0bu3V~-`Z6nll=qJYlNZz-tA=FBT7XO&`|J#Z zuzz~aSMnmwZMS~)=&P|PF>n6bd%b>dddRR4wH>A^zo!mYvan(G&gV?E`a%5oa-muG zZS!jOd^h?eurVSG&#B)|`#eAAzKc*4^(d1fbou_VMW?3$;GXJ4p>lF2%+j`wjmjcKIEs@mSxgsqKh#bpXYPvOi%Xl2hgK_O|E^D!8 zw`S0~Ct7~#TebsAM`FXXq(6vv*$w8}reoQv!XRNV`@*bxVcftQ*xG*ee|kmVl7L3-gTa^KU)?k*$f21>p~irWEB&%;Fpp;Wn z^ig~Jq0&Ad(r2~2F9A+rFX}4H!bi+qdy<*{?e${ppa?dj!NJ>={k3Rm1I8!$eA@bud&d#vP;!+Y`_gXo%O3gPu2Brj%S-A&;YVezANtVaE3>Zgb(9?CO3PccMY-zrV>s-u)x>zP#-7FP zH{Q*$ZaA5724%E<k+Cs;E<5ryOhVxHGhq3K3%=1K5-WCEQc(&>P}IU8 z5Wof~cIZ}ImuRJ(XIj3>IMhf1^+@&^SwWca(wwBXI?k$QHaryxX@Ls)J>E;xE2R!) z8R@|HioaP%aHEe}zpm2W=;EW)_dCgzZfareNL}Z#k;X;3Mct*iKvDAFrXc*ho3f*Z z>AF*R&Qz7Lnvk#&135v>!}z3ZJ+21Ju5dhsWjEu94Y|1tUljYBGN?=b;D>t-zFd(H zeX{Kh1xmG;nfnJRsf!}#rxqBPujB1j2yzxF#Jyz+C-d8CDjK2R(7s8~bf7*&Wk z8IZ1E-%}QSXFn~Uh7jFMi>k(Ad0T}aX=rCP(EU1QL9_D2wOso|qrpNS7m0__L$56x zz&a1Tqj5}?5zd8cTVDsMs$FVgzPgz#4Sz0;_%` zcxlY(W(|0uvhF^tnDB>gai&y3xdnn^&qCmutkK(-xP#bynn(5JLpLEGWvZ1h{Dcd> z{@_^fhF*U;UbS<1lV-awLY5I>GyaC`sHWn-J!8giAp|y`_!C%)wf?*wO;}13sq=Jj zB}GN9C{Ih>Yol@@Jx?7ej)H!slUkB4BYkl+TpIeUf$#Wu9ai0LMFjHd@|;i)jW?n` z6FSTaP#P_qhP?wbv@0=jBb6Rb`gp^p2vm7Z+&=Lkv$fsn1et%jI%E|Vt0zQGL%7e@ zyF;%s>c}sG%cUPu*+3}0rn zqm_CceQ?HuMHopSF`iHhTByNBRnqoMKFAKHXx+8pZ^!m%wcOAbksO{Po2?FKtdWVIxjz zjwTtVq)X;yi#rJf@bxdqbW`76y4aQJt89=NEMdp`I4?pzR|bD;K}zbn1EbTqdHT43 z`RL$m@TwA}f6yeVzdri4R1ucUUWMb6L{4|_*?WVEETywv8K?u`cjet*Zs1_U=m+ye~uI-C;h_*axSs&f{HR!L)xUDXp{Y$ z7L8@%9)2XfedDz^CKHcW{lIp8XwF+=xZiaoELie+cMHQm#r5F4S=OuSYfBfHAVDzM z$-0^M_iPRANH33$0DuG1tXD)Y6W1SnL-|6H-1fJ@N^yy;20GttD;A(c($Bc@P2$Az zocG>u$sX(Frmff{3FcAkU)b2Nb&d>$&zqq_@>m zru0A8eyi`zi%NLHo$0`(I1$a_b)2}tk;)bA_&Q)Lv13et`ef{prGAB-pjf)#7<+s} zYUB07z`7(PCWhg3xMmc^WeCZ+=3M{MPJB;z&&=FEmCL#CGGq5Z^)$J9%|@4|HUdJv zzJTo*2jTx;|=-hh0e&r zLUbEfou^%My{9K#tG@{?(!Tz1I2>EaVJ~2(my0-byP#`*3Blv!`H2dIvNcFF^NS(w z6ms1!ZZ(LP$m_p^?Tayf1dji{-(QEa9UdBss9=zy@B7#t{Mt=?xgL1aL=i)pk63mJ z+X6v3|D@A-7SfxeN$+By;Mye++D8Bd{zVm%`0%<4SPysVB$4w*c3i)u^WJZAS$MQW z5K#yjG^(1(8Ipv3OoVVeeUB=^A)~?P)#C**n^QY_lR3Xa(TL|v zq=rZ;|KU%FfRZ>yt(I$}g6kx<`OKBM{krf4&Lv$*v0BSF9)@hG35JMgqm{T#D^EroPQez$K zczsiFUC60)Qkwa-<$ztu;e%IQGs0-dH5ELrtFV?jAWL{SB)u6zxyuo_JXjnH3Kesd zq}d%Ys&Tr(MJY-bYNDzgVlgD25XsZlkXep%7>!#rBc?3=w3M6MBg?LgWMSjQzJ>{t z)zqXJ{s7|`XvHDlOd(JsoQ(yPgA@Jnl99;^Dcn2!PkKiJlhIvnkbilEKaq`sK+%uAPZQ@6i#aMq^%W2Nozw*g&r6yefL;Rb?id zkbET}h=w$i|}r3Ht9${TJzqq@Nca&22{p9zsl_P=vv-R9Zj2*pMCLMEWglhhTq&;jV(dyW*tq#jY8RHmU@>J=OC$Wd!Y-mQb z+KIxO`25w$Eg6r#sfPQ!T_yZsti_;aZW_sgJcWYJ4P`vzI=kSMrFgiktprj7_j$q$ za8SqN3bKxD>%O4S=@c2gjx{=?!5A|>l)THP*N3=CNWnz$*L^ApH##xF3d{}fn!_@Ae5<%&25fnAM|D161#Mfyz9NDV3ksW_wB9P^{;>9G)&r@+NT&RX~lNMagi`S{tR-!iG1|TPQKh(yo)W@gp_&OToBa-5;FiLI+HF|~2tfk@8{2sR4$XJV8M0|{ z!(somfHn+ zX#;t?&JxOG*yc|X4e3XJaby~R=V)STE>ef;%`!uSzZ~jfb6}X=8V+)|1)jd)7I=Ge zJ!z4Ly)AvS)~>o*)&|9I(cq{(sY#SQ@9oNi#I`C(+rq5EiSNO1?O#*--R8as`s=Ck zM$geaEnCp7IM*KfaQO&iltJM!%TNw(&Uc;=A1lmSvW-8*-K+$ zu?Jme@5R&s#F!dvo{^gKVBVUT@^m{CdTARYma|3~kR8n!He{w4>)f`TWWk|Y;-vkd zvjIgQ=TH%>C|$PcsX9*GL3a)pCIQAe-OcODD6fd1hDufhij3Yv-i#Z1sL0);@miSY z`Ghgu{SqaLuk90z)@ER|airEmb%|@Sve(oxpyMRd6gO3R6K=U`ogm6n4tFOp`>D!l z(h)>^)h{7+r%~ofaXC1}pU-^+{^uE5o zbhPu{2?y8|31iwkzjd}ra4?U*t#VA_%4}jr0Y>h> zdeg7XoJ7?)t;@FtFZVK3?CQrVq+q#Z3G4WokIvEr@jl$~zpapM>)|Z7p=m^ae>P70 zvEA%=%Mn$=CHWl$oFR%Cf&y4jQm4#jwFo3}_OlnU)k9TIYNvz99;GbW@(dizR9sq; zOS_tXF@2iz0-^R`=GGhVH15B%V=$IzE5E~)W+uxGCy+D5LK~yesg&XwDIa*Al*;Z` ziJyd>jr$G7`w+hgkjqm|uN{Enl#g3{IjVn0Zmy#~8vG84Y^=SgdT$cuRpEhOe_j-h zQhn$>>jQ#}cCZ$2I-M^@@ycb;;(1?`|KNi7L;BdCl6^clZwi3W#AYv#uej?i7ni+^ z?TCJWQ7s72EKSapXXj1b9riKbcl(O+@o31()uUf*noSc8K8^To{+F*jx%)e2;%C{z zP_>MSt#0C=1`6UJppH-*(Qt?Yy2>WG-Hl|!Xip@*|7sbByD7VJZdqqdS+i2#X}Ag{ zszp~4b+q8|;L9t=o6Ccx}xdN8c z76gyzI+3M~Q{M0b_FdiZ>Mj{-%Pw*dRBW@D;S=cCfAIxrs9bgBFhwS5f|iT&PUE@F z<-Ob`MvX;Deb*K{*M8_+D{gbUVg}G=+Nxc)7x;}}Rak>!nMJjcqE6yE_#*R^&(MkU z4I7n~3|+(@eMd+i420jV)e9QG2Y7f7|2BoFepD3>J$#Obgo?-#aQ8*7(=(DFNPB%a zOF6J5ob`vQ3GsvED{JJHJ*I1^(0Eay^O{&mSv#ZXevN`k9b7aTwg55iHbG3}Gdloc zS}n3Fom3UgrHc*yo)1Udgm$(HwPcUHl4O*oFUg{I42D~U_h)y(Qg$)#>za0}tts~0 zNtiTzN;zWt8+y^g2=GHWmGCi5SQ;%%!?vPHEdnlDUVnVrRh+E5>m|KTYWPbl%?XDi z*s4ETU}*>XIA6WSwU^uS2n{vspJ1dWF_>`liEy5>GID)6<#!QvDCd*4aPq8`@Kx^m z&>Xf;Zh`QYt4^}+Ul4*kpVseQl(HsJ3NrmQVv_2iWbn8mzR85;up7{6J||#Wl#MLJ zFDD(f*WcUN5tJDsi%(U9rdXi1JDzy5$es1el}GT7U>keZz@>>R6HxmA*wn1oRL}%c z3Xu8ad+oYV^&mi$f&OO;<(1`&G3b-qu#5o8i09_&{fRlM!$AB zp1_l-tjxs>Wby+>urz2V)nz2_SX?!ez}?lcB9DrxfYsGgg{8%8N2}zKQNFRq)wJ>Q zhp~J>VA-hY@c|QrXB8{P(J|A~j<=|2v|w?Srz`1q-Q3=!UN^5t+-3vgncnzx!0P6{!g3n+@2(9i68R&1z#+Z(F(nIFH4Cn-$t5J~cwp;f zfo9$A5bik%UvKY zsZkp$!j31YFlDY|GPv(Z z0}>Ep764U*D+zwQDt{XGNJvu$PXZ<<8WB`|K|uJV<4wlud=}Pr1p7{KWD=r|dax%52gv8L+j5Lu;nkH(VT% zPw}A%czoN2fyDVbGuL1CwanAcwrc^aH5;r*Kf-nWUU&J zsBDA><>ai=Z*BsRdp%t;%Gp8Hlyd5F7LEg1o}ylAxzQo)k0=AKod$}4rF(MLQMRjT zfiyWSWB$S2xawsv=p{BIq_!Dnd5Zy{bQb_ClTj@>H^=~1RU2e*0XkLt9iWytwI4-{ z_2roujg)CwtK}69{kOaoB#I>y%$( z8S%+CcaV_?WU_#NVVN&{4WXP#B)@HNZysU_-7aNm?#qe4xy2;6px#!mPZe@EbE}(N zF*3l)uXQ1U4$h8>7YQ8J7DJMC2X~w=_EXQofl)T-zMcloCsX ziX%!H;yk_+qUW22@b8aGBVoQq}iV%79YTiUxE&NMRyejnKTrzI5$109&R zYug@LkJ`Y9ff60;G+qwM^*1$kOtHftP@g6aR@cD9?uX&@#TrJNv-AM7 zt^Gdkf1N_meIDPpDtdpb&MxOFGNpnt4csXGXACL^%-GoQH*4#=Fx#5nsQL*1Fv#H{z zMEqZzo9ql8WYI)sakDT?oGa~$mD zR`VYFwawreJx%1Sfjq{8D5dVZd8G5caHt)|8~kiw4MRKNSo z$da3j+l*wIWlvi43j}7(%nZk9woIg`%S!QIblBIEb~cVX^w(q#w0O8cxCgfM?|j@@ z$&B{nT-?-HYEX9;>60|v-O@mJm{iLXf&=wwbucJ_RBb=a6( zT#K5UQ+})kwz$~d2>x8hS#bU4pC*?av^~_|E!leJ?#=Vx^e6cE>vqv8^gdEMp%D9~yrxN`Sq%;!{EbgSHaUxYH{lP0 zmccZh>28Wao9Bszt?X;y`O1rcwggE>SC{9G*dr--W>3+buJoTeV_I4%KDPzbI};1z zh5s*xb|FVdrFlHPbIpphixeRcrrXIycd$!nWOT4AEDo3BMaV)3)3))}HoyV|qxEex zfj<`!kxmY-)w70Y!?m70+B~=E&0bBbtxlp*!`Mg!)PuDkWC(O9NzT}B{F_j9Bp_K< zmpfVw+Uz)7kr@sCJQ~00a_jYhHvQ8P>jaWVymtxas%icG1_gU&}TYh*i zXPBIAL`Y~8lLFlfJp~l{|IPXWAp)1=4PDt291jRmcI3&UrY2Fmz#iuf8GO~kbPqdA z7rc7B2yvD@DHXNP=H?c1T5|b4fL|u~iJnKcIN{6MJZ;WUcN&fb@FtW)L*jK@X(?y? z4?J|^vP!?)eHMg39el(aQye z;LT$T!9ax~@o7-ihh!-*;0$?v#*nx(^eGW9o%75`*M|x8XNHdfO3Mc}h8l;y_q~BieYN6B^CqPn_Iy}JI)=S3oO@T*yHtLKD4AW5Et18#u7W%o5jAD%4M{ikz(OXVA z_4|h9fx88ahJ7FJ^Y&F(7qD5ksK)x~y)e2uRoA08m6@ff=jHZ#WktclO(?~aQs!nl z!#Jh`cec3~fql|^2Rn*HU!S*4RK{Y%=YNGn6cUHb|2ml6&2}n(gcEdO{DI2Hs(C7& zP8LtzAm!A5o=EwmP^#B?J_Yfe&zmp*pmeUJHa|i^&;|r+3lVEQ5iZGKD!OSPh?e7Ac zCU(YWs`c0^${M`yFO_>pwsm%k-{(?}cbt%N^2CbMJvD)wxnwJSUKZI_~WGVwTK|go`UvxpP)p!!ty1BD2QQQQB zHk(A!DUNY$-rg$F6N~PeD!KPAWc472lfjmMeat7{0$5ZoJWj2BEkUwX4=(&aNw)S0 zl2OVLftUGQO3~XPBFPnQW+#lq{zsRL9yXYX@*+RhzC|n8Mcj8Ilda`OWqacpCQVDr z=-Wx$#jhJbTJPD<@<<+SI2NMg1GU@)k};Q_#V}Mg`x;p?TqdX(6?e6(?oih+MtcPd z-cIPPVcfbYoL+lzk*YUH_PRfK1;}fuKt#3UU6-R)9dXL9L%po6( z>{98O@JIIgKYT?q3sIjwQE0OL@a%z+%UmGgsbG5>s(7mUuZGQnI!^-!&0J4+fv6R8 zl6Yx;lK5E_bxV^<-+al1P5(PLW~mwj)@Jlqu>MDb>99_;CxfVeH{?E4ixsNNpK}|g z9-(?^^peb1$`@?*^j`bgmR2PFg*_U8v3-E8fr#V>T(2(Gss5na?v^Mnt)T0M@qiG!u?h4E zu@9c@TQN0_iPra7vjj6notJrWVnN+tt2W-5fS)QB^c&e8QZnx0{2110%U}tLqoprD z(lR_ol<>UqcUvhfGYUd~tEYWV&)j!JIljhB+zb#r_bFA=iG$pyZhV_Wq#i9_@D zGq2%{Q+Ao1s=oPE#;L|%H4*RB%V~aL%l_F@Kfz}fTU@SvWjIgtstN?qr}-7v)wnoH z?uEW4FmS7}2)wQymU*U>p$oBsYM5#l=SVi(AGP2W~CB;Mjo z%^xLphJR(hn$0#J!837oolmTdYN)UO-n>+N;e&OpLY3VHGM~HX?Ol1P=iKRpx1IB~ zvJ%-|8S4Z7jgzD0X~{osAJ#%91pQpG20?Uz@0FMfSZxU&#f1etpD_*esID>U6z`EB zI!6=7!x#72I~LxmSI69qo~nLR5IMFF5^+|}`7oGh^O8(%Hj@%AKt^YR_$E|)5Xq|S zGjS`naoxY;yQh<9ZZ1c+@6$%(fc~xNBQ@v10$;%=zix#WQaHi0(&%y zWvgcP@u6_2Z;i&d;EQJvTZm&&C+{4}`LOgSsi>?`)tSg?RI`9Y+^I^s|LuJ8X)a{a1J9L`*jM}_ALKkdG8I)$2+M*JjYeEW@1xlQ*O zwp5v8YfK~eLP(H(j8}cm!}dC#IRM{7z2`9u4Jw#whbCg{%XQ@D&hqfBYDqyV&Q zdOCf07*c2P8&otAwP(fP?y2OP6uG${#Z#JNj_VxIbtyLuz}eN5Rnlfe zaia>5Wh4pM$UVQF7ge!>0?>ygio9&icTz^A_nJp-w@yE_?1$~=1En|TlAP>647+Q- zs&)ep_X4ilU8$owV|M;AFux|Mp<8JN+YQ1^iK7hUidu$~hvC{IQ|_9@yd3-ZvzP|H z$KO+kk8GvtKS_AI^^mq(jzw({rk@ui`MaR4@9GllVNhi>a~wsZ^pyqRH`?TQJQF0 z+|1S-?6wJ+$tP$%uatRJhO`(5d?oB4AN~*bjL_w@C3db8_aAG4DE-{*L7ZRf<0_U! z>QwAwPD$YsE=7||@iM3%E(0cywx5;@wr60brwa-fz?!Y&Z0=mYI2=}k?9moHX)Ft) z^6yqlbrr~iqNd$B7-cJ+@U& zC%F{{L~mHHP>`eD=MGr+H20yb7F#);O?L z%w*~x2M{wZOM`Wo0nrQdxlq3 z+^oUGz%?bTi#%kC8{V@p1XKb5mR?WI`mBSCk+Mw;WZX#GhX|3#FcD{!n_$+U#Hvng26ICzZt6iJAT|Bn3R@Wi&W?a;h z@Q3m`EVPVriPZcIwu&%ae>akW^(Ds-$*mq-warsZ3M@}muTHfBZ6N2FDE||r6JS8g z1o$$|uK0W5v8jesX6D|@(Gg*D%a6&;H<)WmR_R{y_9Swxu7dUSH1D%dX5UniOCUhC zkdZY_YxZSsG~P_j&vf!Cn0a@!r)69s?MZc55a3Em9@gf|bBEdJ!FBL*=rtzx zwg)%m6ci|uET?yeMj}G-K`F!@b7E!*fL^XUd=aMBSFT?`U`7oSnaTt)ho9K!Um|=u Mns-4}>UJ^z2ga@rr2qf` literal 0 HcmV?d00001 diff --git a/docs/img/ml-Pipelines.pptx b/docs/img/ml-Pipelines.pptx new file mode 100644 index 0000000000000000000000000000000000000000..1f773376abc7a8015572f3a796ec028c8be5ba2d GIT binary patch literal 56777 zcmeFZV{~TQwl4g}w(V4$R8*WvzYhX={Ib-FEl=yU&?F zCdX`V8-0%c^r!2Ul>h-n1%Lw}0RR9IAa>9kcpnG==zs$NPymp?8Uof<4u)0^I*P6~ zhW1)?E|wOAIiSE4SpeYA@&B{^HD;+815{SvmdM?b%|*&2Jp$3+0|L5MrNAi(Lq-I z<)RtAu#XgB<1YHle*HlE-xm!$KoqKFF+FsqHprj?Yd4!)=@Y|}tCWWX34O0U5-uX# z>ljv`;??LhB~PPVqusvbmDJB3N%Mg)r4NJ3$I&oSk<`I+zM-2W=Va$yui_ELhQFi9 zLJWUXsLu~15<-{vqATZFQ@sk#$d01~ZS3B%j)~D*Ro_)=xU^J2pt#LHXF!o(=yDMX z=m_}Y%vlK=WErPq*AZ?iFasxo;wSe-nwjYFl?e37-0kG&on@&t%7SZE{ADGIpXD|? za9$sZi!Il?5Y6-~RTu=A^Ka$J=8MPJ=ExkA`AmIF^xsE4b~de0bA3_h`(cyWl54ld zY8Rgehf`Zgo#pc#^o+Fye8%*<=pzk;DD@%JU~P}-qt30Yrmqt? z=bUc3rq00dWUWnm>r~TFIZd16O~BVXvd~BPmfE--f8 z{i*#hpYWh#XJ}zhPxr_Ee_-LiF&6w~>SeKA(mf2YK^LN}0=q9MR)hhQ(^`nrR*=6S z7(!RTmr2Dr>)N>Vf`5tUrhpDCUS(cPT-+J)+-}F|*Q`^jqLV?|E#sW1gH_(mY~u^+ zJAk`ih<5oRTa=uX-oe}rx>7i@lEx@$%1?d`hMor;0P+>vAbwdyJs@d_c;!@R7E?nXtpDUdQ+fbL zr~up*|M1?9aD!QSCU>F57dP{Y#&lvAHrbUFRxi@zDN|5k|H_+R7mGOGVmIjpckW3<^JS7$rxq1LZu7Btd}wQmsB;+ztx@}nk02ypQ8Hq`IPYUXk+6*Z*O60 zU}#VO4@LU(;_Dwq_D_`xi0U%yVL%kT2z>QlbSLEqmJc)3;U|n%u;tc`nQsW8g(^or z?x>7p1$9fq+aHsjd84a!k<-U$4N_Mo98A?sCtz898M%T1HNHhZ*c9s#ht+a_H1RU& z*W~AH8qAU9G*7aN+z_u$HsMPkilrY%oc3!PWG)HW_a!M-Q1UFPGvsag5mq`EUC{tO zRawpaF#+GR37hWkO1FEH3f)&*>(Il(Y2T%fO=%u&=YCLLWavtuxKgyXdXMmT;PLNG zpXon!`YaEuhWwvj-UAE(AOJuD{qgnx@!4(c4DCNTmY##Dwbg&k{u57uK55qH3;(^h z)_5r^e+INPDS(Wl>sFbAK01)K8REr75DXfi^LWqd*lM!=p)0`_4 znf*6Ov|c5Qi~d5zO2gZq45LW$Kpu_qMYUsfx5U)F( z8Iuhoka7hn`OEm%6+SYCS@3v`B{K~uyL{T;NK1tjK40XR_Zlv`_P=CwG0G%s?W3TV zA@r}kHa4zX8G=u+zm)J!B~!B{FhUqpSOCS>H23DF1VqMP-5@r8fPTgD|9u0cSEdN+ z1SWW1TzxX6>x*#AwkqmaETzy9;A5?viqasAZ9dZ`6VeuN0VGHr!S{RiF;(5p^jOML z>-rfd>MN(4Az~`BvIB+I`^u~Amq_1UZocEuc4&v572n%l^6>$@+&8h-A8vP}`Pmg9OA6C&~q;xfF9QS0vuXl?;sam-8~qM08QDI54W zL-1KrYbAcc3n=5fzbutXExR%^6I8H_`=OPfmZ&Hmpn_roE(kU=b9^v;AE~Y5`MCEx_#(((rFmNO>aq_TDmW=DjFy|nm({mJO(Ly(9Cs~-qbaL^MRl`9)S4obD5lRq?+0lB9B9 zuxTmNCApvG?wYDN6YJKev(wZluOr~9V2<-EgE&5RJ#+dd83Tdklr#hR#=Dpttx;FL z3u~CsJMvtd8j-07Bd`*-G>Qb&Nk2(xK9m* z{X!39NRbP#7y?BYBlF}TVQXS&7(_fgApdIiO)dhSm1vZx*vR#Ex9jaAqu_HYg$<=;9}1}KLv-n<@0zMBWL>FU&dtjEI?Oh@)sDlCz~RIV&z% zOM88s{Z6M+RwW#piyuAxHGej1tJt0cCnu7VkK6u>pH;A@v@DH3R%3FRVZeE9(NuKo zti07DvXx?lz0jDr!<1qn9a#)(O1DXUuKc*Zeqw4~yo5I*X8r3CDCY>hnr}x*TWtI` zicUw|nV~z~*a5e3dF9Qb9?P{sX za9tfeAB2Jh4!Py6IHLno9(PwUO;5*XB(>>YKvlY`sBOobZimu~E%roKE_^U2e*0Aq zXt~M^E+P5|t0#_>C^=qhU0)|)at9}Vy0;U6nd;s~NV|D$H|yzY9GWG8YMbRSxbPN~ zD?a0)MFb)+Y2*Z&WJFXGpFD{FC>n5O8||wx8WoJQJ#jL6F^w0j5}oPZr_{#MBU9Dd z6hxJ)Y1U>iEr;??_vzR}=7Q*ZDCol3F^U>SmRzieh&e+RJmLU1^{=Hffl3BmIdn89 z;y$bb4B3nFTetWnN9dDbGC_jNz*rqbp~H2*&vef|0+5XiyY2{AXEg^0P^-FjYf`Oa zJx&}s=2Dv|&SlqFUV#S()fegRq!)N>XPiSy~dYVLzEqBtRvLY8i_VUvsD zCNhi@;uy2;#mOW#I9dW!XXSA`R7<8{grOjt)8};7R2wI%)*=FxY`%Khk-ZnRzDtr> zwX;&{w##a#p@`9CRDu6mop}{CI{T_SyRlarYDpl^YX`CMMi}&ZKAsm$2>*50rqxr` z8zBT_0WKCak~4(WhfR8|Y^D1>kKJbx7nUm?p78yN@ZBH4vN<<*jR9iYO3*tM&Wl@M z8qIgdLU^yg0K%J^>kP0>$`kVuLe|9>u}w$;BdT?;u`B9*k$B8TPNn*2{`J^aWR`z| zduReyhgV6}fteNuH11eGJzpHZJs+k6YCzGcm*A&g*3Y+JP%-sQ-i#;0*Vviuo{L_3 z?4fcKQ;I18>$)yj{z5)Qi?#F{SKM0ioZMbIk-ZeI>1D28K>~63G32qY0S@sah}gX- zr?UyGBE%xJ?tUj%hi?%7hOUIbMZU*%PCP=-Vgy4z0;TtFmGMJbvl>9_Jl%zq5QBngv%;9H2}9TWuhAvnk1L)M=R;#5P!euWj?Ya{s$0hhZTArca?)s)y-zfs(o+>$er z$PL-|0i zT%6X8;c{zI zAf$F4EwgpSP4n03<6zNJFl$2LxpmniRIU+jRHmnn;!4ni>*7{V>SKhvKv|v-H+P~m zkDx2NbVeGDdoGsnE1$x1AW(LcD!8c@weW-P45J6IZC{nm-(UF z0gF497OxEj)|U6;!rMV+0Gvhc+D^8#+)Qa>PIECHf4lTWk<50$5qE3=;snAscWgOv zeP+5A2@1GZpCjOg89n4FRf$WYh`wq{-RBOG{)}uBGcI^8rf}uk*^I*cxJ9IBGH3Dy zMeBIqB)P^J>{=e-ZCbo12HV;?hy1|2 z#)}l?L}s$xg957SC1!f)J7T=pOpNX(sqoO1brhxg-)isjW4DbIm#yRbmue}ARN`aC z6gjKv;&Nt|%$*1J8TIMJ(}s0X%x4F{8F7)}*Rc~@G*t#`b-(gc#cE-_iX>s-il=Lq zai3Sh+NYm%`zl(?N?2-s9VJq)V3+KbSXk~EYAff+@aidLoLmo2v}iBv+`pHlcqVLf zZ}Cj>*Y&vuLiwoW8#-wLUDqg^oY1D+`{UB>-B*c9vYF-e0f$(?T-iCrgxw-{pcaD~ zYiD@8PGUh*bqz@6y37W7cDB;mX>~zQl(+{|paAaQ+^ENvV!CJ3xaOA^Jej3)czXt* zC+4}B4Tw0mtLkBr5>tG+_O+F#=ksyO$|imW;JtIi=&PB$d&WVZS4aino)&o7&+v;L zh`V*~A|Rw-5J`lUtJxYgnV^|)jMZ?_yMz^=dX#asNnWI(_t0szD&gk`oF;GmDjT1B z*Jhp}zm*~+G- zWRZ-oKk|X>Gy?G33-mhH58E1;_|m;{oq#}bCRjZ1GZAN_Al@8~t22P;%*IjMD@991 zwPoQ3NGrnS`}8Ik!@cE}mRxZq(1khoGQ`CR{&8^V1+_ypezSnl`^V;6qxfhF57Y`m z*&#q#N|pYm&+()cDW};!#7~vuEGm$713?W}5IUw9!v*OGv&q+Clf@+`$U$5blUm$c zw;|t3oJFOA&^_T6&aX-6g`3t1%i@utL_NM{OrV>Ls~?l^qh8)+h6SSJ#jf}qLk!`k zYKw@7TYNTm`YovKE*KK#FJdnI$Fcc>1ea==57Bq{|0Lb4@HK)<;Sa+Wk>om5^fys=ZDorY1id-$VeBv zdFAb=?}+t$EpYI+b%$EK0#RVtTg3wu60=UWXp@+1hzZ!jaQ1O|L#LB1GWm8oKGE~=u_kBqP^SSY|X8(m3Ol;nF$tuV` zm&G}g%^iv64j;u++?(@GvbzR(f8Tqo8JQm1hg4s-lA*2Cal`n5fPtWy)lm0c-FNP) z0eefNkcL5G)xGu6lo|ia+|QvmC{~8ADO0_I1@^O&43pkLA1Nvz{BaMbm) zPi3SX8iX%Q7Bz-=VmWTDBDzL0yWg;G{28W)ka|gYGwCwN0Pqn4Jm6uQ ze1i9Wya>egT-H!N+>}#8|9jGqXzF0m;=IHOk9VDIt9g{^t0gqp9)Yg*b9b7F9Vd>4F-2XsMZ%q#B8e;Z2O3&>3v{;$%2RZP1JCcMDkSj^v z$t~HBzwHU^lZ|uCN?c#4N)t#ttL+t4oHb{aD$bpdD_aOlDsqJ6wu8MkDUX32v$5}i zjyvShqxOf5ltXl;pWn){%~u62n|K#-GR*`f@~CqUOCZ;6!tZWB{eJ&Juf6be0vJ-yU@jKfLLOfs&VquhgkoMX{bf7FCTS000{Zy z!^=fOPx3|uXyjDsI$P4*ZLUtA&KOxLu;}ABQ}1YMvA&s+A+sm z9Q@@Hj|Qu0yz73(j+VXLXgwd?xFXl;TtAu()riA&t6?iex3)@#)nJt`8rJ#MBKHig zbHzwk)jSHYqj-;O`!SJo8!lA3TxGjV>`ho#ZrGC~>vc$ygeNSb+be#^ev8v#V(tl^ z5`Pe4l4ovDzu6nAHThYO)rf+U>iy+iq|jv4ikd_jVdvh|x=!Q@%>ie}u#tsPS}o>7AzGM~`G|Qx)gC&_&2ixOS@Mj)t?FooOHZ?h z1rrWm!=_&5Uhy@bMMWz=ZC++N7qvG1r)fk2a&in{=p)H$rK!7<8f1>O29&5;kCeRP z)lp_9EAj$?w$2{X5#Wo8QS=)7hWl_D6>VyB&7%E8_h)+cv2aA)8L1$+BL?`_MVD7*e*RyYFm_x!@Cc&e)RYt z4zqbU;OtR6b8nqh16n|NzGW>adt;cae5H!I0eAiE?2eoD^w}5hx0hv4_63+foIo)I z4nB_>&N~Y*!yr1wjP+=cp23R6BFA8m=R~alOf)=xoV@fGba$OPg?(mMvs z%ejVPJOpL7N1To+WSh%A5tVXsmadBIkwxSj#-$t;Gg0OF`ytvnJl{d13hA*~jw>}L zQZ5=Z2}GjFVi)a>6wKBX&xlz0?t~a4|6YpRi6JL8=^jJ?fv5}f@d5=6mng>0-ivJb znVs8T!5o`=b)puJwHPZfEq(m#RfN8d(vm%Z2ptO3NSI8r(T&~bpuV4gRFa&_HVhAC z3v5-OGV<+7(pJz{ML^+6)y6)S-7aMSQE-5RQkfnVzVpr&GS5@elcR6jM9>+CmUGM+ z&cs7y01k0{$QF&hg6gu2<*fvh$CUI1#XXgh%Oam}dps8>x4YpY3G}c!USToB0K0=+ z^NBzD=Ha^woTVAWMox-9E@!s(A+b8-0}U>xcDYL}ZB8L81J7&LrG2(U=$AD!W~;ZB zlV;}Di4JRg{+3}Q95dvW^*~|0E^OAX@NpClU)EY~8{K=FyV?*AKp-*^Jk%bf0kQF3 z<_~nHjO9BJzaJtatn4ndV92~gLj+xy`4>7z9}d>NC!We)#oImI1w}B|qO16k3|`l7 zIs+CXaRLbSHmmVu%V!gJ>gn*_vDxIBZU=}=xoKy=8(?%jxA4vR4w|~`7kVl;N6~vg z-gd&x?Pv2w@uyD{;DfqxW~yf7GlWZl4AiUCsITNCpmei=9WSNLZ}XDCqSDTeAgt6m ztq?AkpF?{9hS)nro{aEN^9m35@8E2xz9kC6b{Ye?W#AqUpEJQ9i%11e9^X3)&GS=z zWF~|^GDFYeh33$VDf+`p(>@?DMHLbc@TN2BKnrY4GEeq6N2*2Be*uFmajSi|#b-bi zcMEc>rq`fvKmc$4@_o`nWP$;Hhsy(dGX$Od8!3bU5QI2)hY-FdD#(2(a-;XWJ)}0T zewHj39L+@Ht6S>#9h#(O{8RaPN-jEtjBNOK0J_HTCAOjH^v z#(5Hv=pLWK@{bkw!%Xd*Zt%e8?;lOy*jYU!>QBO25#_}E6idEycZ%#OG`cH0pAmvl zkr4??HJw)S(sjv&QbpU!!lI~CLr}6pX{teeVLps0ZV;Iw?3Xi&7;Vx(Ys(QExG}iH zH1tMu(;P#pY^Y#RYbn_rR4I=`FP6!SNrI@5@UWEvg!y(;a(qTO@D65ZW9w+Q-FAJM z4z?Qj4^nz;H2Bs74wv&UH`|(h>dh%MhINocW{Zm|Vl}TeQqP`F*3%Z%i<;sXf-eHE zK<@!}L^N74KQw6GsH}6-E!<5KkeZvef=C=uWy9mF<<#u^khVU!xrAblu(-I`>(`EA zUL-Ja+mfEwr)Ehi+?~XA(qa-nk`8~g>?HYj@Y#-*J565@Y_%I zYoxofi{d7@Pv;ktW!A25nmU|MpJCR24Z_Y%#a^|45|BUAvowDV!bdrpe=a0DRc{xV6XzdLTZqExRLq;tZV0#!P+V1%^bTg z4!LEMWwr*GZyof+qjJlMtD=FBEw_;kTn2l+x5_#(e4n3WA9=e=2t9CHttZ-km9oz$ z3ra0Ww4F^$6lI@Sp^`X-<34X*sb0=>+~+==Qd(JneF?kw4o~97+v$+q_z-dQ2XUsi zA_abWK9gO6VpWpcFm_(rhAW+!+B(D>>#@1j%jZo0RSsMsJZvWr0Khce9~EJLNmEPd z**h58+5aPF{f~=(P%m{YtDQk4FNA20&)@8fd6a=Bp}%EXhTTkOnO_H~8%*d+B2G3M zF~89FZ4$7G)*y9V$sHAIC}|dxYhaGi`(A|(4jXz(hu(7XQqo*f_OP*9Znu*V&Q2Yk zZQ5~?3Qix(I2Lx2{j@VT8YY>WP$sITjqYKxezW|#e>&mqrUQ4(`4%}B3j^YSdglW- zmLETtwNM&Irc?c_rFI3pkDjmk6{>N&IT89EZKBgN24A4)={75nffDtsX~5GPZ%z?RyP5EoBT~>sXma3}6R8@fnIm z=+kg$6aMfiOCWvmtNERU2y)in8X;&Eke_mc2I_Nv4G3QJ*$^X<^B*!|R`co*U{&*4 zBw91q256Y;gtf?XxV7RHE{!XZ+goYw(rk?|%J@*z(ic1P)NKLK+xAYt?PD{VK8k9QI^);Nw+!U|Rmn{zBL`z=G) z=Mq3QB_3-bMCBTz&CuoLlBJd!VllG_8m%C>Ec2x?AH}X|gd76MjV^VPx4`M_AbJ@5 zp0I1MHDbZCdiVy92Yz*7f-0{sE(8?cc(<`vborerG;p3odrqSlI2OrrPUJ>B3{z(F znoMSxn8nU(Bg<2{B2W{*)kUWIN|()ONx#sEO*ry+T6BEr*%A&t8 z1K;;4EbYnJY1rL58|P9@fc2s11hr0nz-$O*bPOsCr+C?(QKT)~Zlp;57)TM)w&+9o z(jYv=975t^jw#g9oh>dV04*7}6gE+FQop%TIXC4wxMj@2RcP#e~O&8tFzhERYI^P}1bcs&bG08b=|2k#iO;><;gyncl#XYoV8T{zC7jeCQQ z?u&m=T;k@%>SLYu7y&~Z|GIrLR6c3Zx{x&Sdt8pw=F9oQ(31$lN(sD+=d4AK(*|Bd zL9UINC+67meBDLUJX=a6QP>KKJ8P|o^k#!zi|{^LC;ZRf!Gb6BnICV7pF8^i6RCVL zg(d-UXd!%MoTRi0Ay#H@tf-yNWWgWi7Vb^KXa#g-!7?XT5H<) zh}rJ2xC@#$fc&G}P)li$47KQXTkMSc&){ z&S_#Azrq|Ih{tUClN4j4df0~?4EGcn8{S45VeA&XVt)ZXl5dXgVw0-PmpXxT5(@~* zbLwYjE4HcOsuAcgGYi@v!XC&{FAickm=u!LD8d3L4dc=Qg!&BmzjM66w6V@ z$HS`u`#=?o{4}?;vieYOh=&T=W#ORMEX`{wWIrrU^hT(fL>=rTH2ika`fpD?-k{v_ z<29W980wsPFW@j1jk%bkz{!f5Q!QusDf6{>RV+>39q*i}6CvqH^G>@!jm?OQ2_#8b zq)$hh8q1;IBoCuG8Lz@ZZ?ccvq<`X!K!;50Qk7zvh=(LL@FdCKY2xcLP-nk(j_M?v zk}@Vx{}^3}BSR4eCJ?w8Iq)u?f>)&Sc7ZAi^OQxK$1j3DDV0Uq!J}D1#U<+WB`Oc> z3H6}pAUr+}JD=;hnh>#Jo04!HN7~|rbpY6XvthQ}g~yi`{53VSd9}!uh7oQc(hI^j z>EeRN=@5P&aJ9HafC{wQ+mkrpuFV%3bm$ae^_AO(~Ekk6a8L1yYTOFpU z8#$S8@PEe?{@)qX_y0)7^jA61{*^KPADQD<84C-`)}2R_~Ge_9X!+r|D%J@{W7?LVu+ zd*{~7dI&MWH*dPm`Py&#k{XT>Sh%qKyCa32fsi|1s|5X*t5@Ih7({jLQGqicu{}Jm z@ei+V=bU#@A}s5-J7ZBHOXNqC=Wy5Q>18+w=b^}i3eoH^r@D(>JlNXxzNBp!q&ijg z7E+94+gh>7nvH!EOEUF%R;x^2DA&N?9O1sBB_nWiW(jrr&&h>gekf0yX@(d_Rc zc=P^UQPR+BtflR)c%H~qbfz9!sXwc7@FTEz%2qV?s5xC4Q{oi?@Ya(+Yii%loK|c_ zeajkzC&74LK`}d-1GYvz!e65_I<8E##9m;57i+0M%u1w&uaFRIu#!|`DY|cc?}Bkc zN4hc)eCo_1UrZudxhR8KnfQ&%(M2SO9?d?=175<#qos(_jC@ zo;sB^tIs@AJ3*B*VV$|qM-6E&odq>Ec%GPSOPetnJGyu%u273pFtBI&ne8(vN1kAz zE$66O4bD>R_>;@{L%O}FR{IhQs%7Y**?c7JMRz@-sf6;}+1sL92hC|Oe|oQFAf@@x z;&R-$8`INMp+tsbU30&CLkuLtGF{IPb6TzT3hbnkA^)Jd;$8U;q|(vzFae8A$88u)gE8?%i!ed5WWt66)k5UdUkMUDT= zIg(n=r!-XR_=tU%BAOo@m{i2qq@B}M`fZLG>q1(=a+?Pcb-)X|nM>-O;-~-aSCw|0 zh+FqQ)p$YeT@g1Vf6(yTNzS+ytkV0iqo_+5zmgm*4bakEch%*gz*U8uJHWgRIAs^u zPfFzoVfB|cVC?c3dvONl?X2{)QBy51{#YXUIpns!bW&hb=qy^GYvkS-SsV~j(^3wa z%N7!YKB{qD!;qEUQU%%3_u9xV1w18|H95^#mRz)e?NZomq@TJUV)8<(F*%dyvb6>H z<0`B_g&9;G-ZGI{o!Xe?EveyyUrv3fsQPro)9nbNh*20U?0>jnhYwvz8t4GIn zCFK}M9AaIn9~*blFVwRUa2;ywS+qkpAlZ33U2fTLoy>_jBo$UJ>t~x*<4Rl?)iM*& zpOuMsD?R~h5EW3HA5wB@+{P=$DSbTIY94#B3UtG=a5ZH_R-;*Xn|(Z!T*k_s=T~q| zC~VNhGStsBWiHQ#+i5@$1ezW}_gW`DfQ6aO{g*-6h@HSCug1EY!7s;VANyrmi9$WvnI zZX0fif}71_YS8gE25PXWfwhg2twlJ5Ox$0o&8THRs9oi1`i~}7;f*sxH_PS+tS0cr z&oy9E(+Tpd5Zz*o4g4tXOukO>Lm!uwML{dJ_Psj6)Iu0v{sP?bEJU$+VDN@g&p@F3 zYIzz@di`5qq3Su88V3OY#$o;qVEL2mmnr{O`L6FIH+8dWgCUWVp2n2EFEf-`;{}aS zpCF~2#B{})knj8a`N6G(lW3@ET(i&2qfkRpau1VnP*0ObedYIMx#A>~0&NE*w-8pr z__C%=Z<}xD3vA;gyN4!L#Gg=;i#)RaKeBD1m(^IT(yU?duBM zEyomF65_FgjUEauRL;`t*1xrIgsPfOx}nk^^4uET*8?#SFyg}r3{CrltPoS0mp&td_B#ndG-` zJs%$2&=HZT!n$7Q1}7iu)fTK1_;e)E>lx{5Q#8nz-P{y@XIzwKq*hR>tM9!m@~)zy z;K5m?P6)@yVf}{t-@{T_fhkaNri0eLz7c7KTs-E4#S-rnQuv zpzuW_10H87Qz1-8`_OK=&n7cV82bT%?RjC55P5l zJ_M-pWudjKUrDUx&)PilbSe3|?ul}_zk2LoR0s!t|HgxR{H@d#>De_2_Eji_J}W5KP!V;C6CVygOgVWFN7vb`-0SW7 zfo=G*WsLa~H! zhjv^|Pw;KacmqLs4)}Ma`yZ4v?+oJeKSi?z^XWs1{H@60)URTOl^Hk^g!On<&KWeF z*zgKS6u>(%RGW21&=W=_O-)efbaw8YAle{|8H5Dh=+ER~?ClTU(nCYn#)A!E2I<_O_f5xEM$)6b?2` zS~LT~fQ)7<5vm@md`1U-tKKqjB<3V4_6HW!-iKSrF%cNIV|k^Gd}~R`j)W9*t*vT> zS>Vqba^*`M2)ujet|fsH3rq!OT8|E_|1#}X`+Ao+!kSR_yWhQC36cRDYbQ{Na;`s; z+?wMgAPW}Px36hr@utEkxqf5YpR3=9^@R9C4nEzyNX5A^INHu`z7J}3|e#a09` z=?}sccfJr-%jONC7SoG>tCpj4(44@2tXEp&M~0;21@O_$sGIILTfy$D3e>x6(E;4dt8Tdml?}ExanHemFwELaFRk%gm{$qsJfYTzKmW%59Y3 z3+5T#(M4O)h0+}|^c_dS4C%E&*3`31mCUvNS|3xFUza5|XQT6-%(YJ)w-GMQ(QQZb zmW#MZf)F8b1_1B~S($oki+R->)>+l&zZm?ouOO~x8TFVyd{9O5vY^}g(B_#*l755#R4o-_r(lN$n2F9R_Tqsm7rpyLlp%)p-?u zj%NDGQ2_Mu+;h$agB(||{q~rG+|l8FU1Byeo3`c@;l$dr<^FUCzyq>Q@&bGH6zaP= z^sU&To@8oAD`97UysW>h;|zfpx0_#wA6S>JCZ51vK?JU0FX{T24(IQ(eFsg6QZw}( zJU03kygV3$o)~wV3p%tg{N}tVR6Cp6;uJgX$r0Yl3E)D|{<#F_H@oF|Y&2yo`Uo+8;l7qf+93gC+W1YBP=>($7NuVuVNb<=$MbHy6 zx_1(kc?t}jYcZXjHpiRIQQa#E5^bST`|YZp=-H>z^KFj%4W^DcVDSD%+05v!{oj@T zPuF7fvw>%a715J^(wlJBSV-k0h!jRACx@36YJLepmy{hn0a+>Rewh}1al?$qxWmAh zV#=w8(hOC23nMirsE5hkXb?-QePuSSEcujbN>gn;&>U$VHK#1)fcj+Tht(JKBUDTA zVWFOiGTM_e=LMb*hb1FCdg~+QGE^Wusl+L2^J2x+@6swXYEmsQQ#Ii0v;k$sej-n& z@v&auYb?$l3lV84$`LGOi34RA;X+DP$rQ>g=8BbqjM8YE7YRx$Se9pBm(_>ZWphAT*CGaax1Ksu!Y&skbeMeRTKr>!HckC$l2cTa1`c^r$9bV@UKe5?Gefn#XG zRAQZ&_u!VDH_sY!e^DavTB}jq90}~dq4Sw;YN#2 zC@aHc1fjZz#bX}o6eN-Ja|;E2B?9(QTT8+ z2BNM8QhyRjd5m#E;>s(ymNFHZi4S^l<-KAp`VIQifP% z(ExTMsVCrbGS8O>3?}7@Wp}nDa}j;d9GD`{vFm-j zw#pK8S)Cu%ug05XSECYI?(7nt95UUr(uH1TQE_VV;O)0$;FU0h`M+%6cR{VsxO&-oa%7g}R17d*KCp4`{!t{Pp&ApqVaT zrbg4|Nn)--tg;5zO1#|mFtoY^1`L-2KW1fl3!zIk93m?G9j$GfvmE`eTv#KG@5k7j z&ysP#?@5!T2!q;+6A#uGqx`fcofg?yea;tE-|p`!b=p1OUO=l$%#CL%_%B>m_&}zQ z>Fmg;2iu28BVJ7ztG{R`t!&XmoY~?>oO+-JY&pdKEbJP?0Ia*|DT@}Y9BvF}rw0!a zs9!cyiJ!`L<_8F6F0R3XByUF12K$$eGClL6Bl4oi_j46A(K9B{5QiUb5 z2$pzZ>fR9)muhr*2eeHU_ZSZlnGsVvugBL7ftWeVAO&cwfGlz50w?>)Mayi_)0HC@qy5Lty6h z`Z(ze=vrVE*8tWs*oZ`}27Q#84lTOtaG_E&l}oWYSu9L}PlwvRWNZ2}=j7~P-)aLexlct#F5IPPmS_xYy@rupdKS}?A(s>0^^qhVg|SRI zlFmhD_U=$>j;^<%Oj|boFnPr5j^H*KO-DXR2kLE6mw_B!I;6v5#jbn?byVc{GS*cg)|Dh)X(8$)-JaN+m+ zqFs@%Uq& z9xi(}EU8wJAFMzno@$Uq!FxzZVrV_nGW$TU+e7Ps3f;Fk!x6PlVFjWEASx$fg99+x zB5?w>43vCWCXoA{Abr(03Q7}sh-#?BQ| zW!kIAxc!8V=fEFUO zk?8bDYkgNYdLp#)o~mtvDaoiZX94;lF z3?;mMz0oiNTknkSImvvY-ZJH!?pZqjV#Yz~%)I(N`>mOGK)UTAlUw|CHUiI_F@nn+ z9jqEnrgafpXhsh=J0VE!cIk9^61JeEb+h)>i|px6Jh`{#NB5RngK2Sx6f(?bH{A}cEo=2~x zHDO}df4Hh`WU6IaD{$0ANB)lit6CRHUbp&U_~ zA|b0I=~o29bc#S8OBy6%t}~C4SOs>KGI!NU3lD9ctp^Zwh9i7|u|i5Nwn~-FOXp0;W0EKMZWBRw5{h$*(m2jNMVNJ0^Cw1wE9RH(Cmd?$trDZQ`(k%nE%i~wn zFP)(38ETtVzn)=IZTJ(A4bSxNf2WL9IP2Cf^}*NufCs?M2m4i~&gUHb;Tx>T%loy~ zE779%%u6JAA{80w7BY6Q(6^{#>J1x_wI^psI+2z1)YG#NfYbXZNF``Ss1kdq|F)!og z)@$*Ha)LRLvK69;9lwIxznV(a)F6DZVWhBR4j4%2P7FQ8B5pC^|AuO8&~E}S{#co- zkqsvgad&l~VmA}|J{rNU+ICDjRLMepYbQ(Oh}?8O7`zU(U; zOP=LeyJb+gFuUZy$Cb{?HYApPsPz0I=T2eM0^}p&4Z*tDiw)3q=cqa-p9cVNI0xKh z92fwd_YZn-fT}I-G9Z9c6@odyC*uB(8&}*GFtVD?M|rrpwNOsP(N)r^Dn^N6gz1{w zVovo~b)I?ft0c&7;*81%A+ccD5Fra{P63ZAYC{N#6)4+~N0h1d-#JvDvc?Ae*J2X- zq>pSqo9?t=yp$K1zu07^jBE~`=$BTQejJtZD`CG?xf-9AnCl_7VX*#2T8GiN)?S>C zm~_|U$kIDW?Pl$YJ8U& z=M44ai11sC9PqM0$Q8pB@m#*f_YIN0oZW19v4PF(!gxmN4@$+v~O$#NCwA&tgYmH}L82E4nBglEi&R$3B}p27Hp z?|dpZ$5!Eqm$WXvdVr91v9Sz|Crq<>zLWc)o*^5CYWkpJbD7mF$q)U}F8D8H%ql&L}9V9;|ys!#|n2pj~6DhfN2_ zqZgCB^YJWeejkL1D(;PN)C9NMsZAb zuQiWu{1^Nm&3Yk0k31BBMVyK(~g;!H7(EnI24tYW3X+ zIH%?QL*wgQcz5R`exJ zB6W9X%}^_D3d=g}iNH6}?Jg5efuJdo0rV5kx{Ohs6*jT;>C*}Qtl=x>Es^&4q-^1% zpY$fN=~DVK zj73bsikPZCQA=9N z`MIa;gjg*S_!l!M&D79wXYSXTa^!e-u^)_hcP+dPz^8Rt=(#TxTkEu_SBxxlX9^W4 zn-8ZRx^oDH?aWH9FP@Fv?|JC(;>W0b;Zp2&oehG{;!v)gBageq=0{7bPNlo|ZQ!>8xSkYq{=oe;!=@n;W z(6>Z5m5OCA@^?vYF0^tJ6^(|zV~?u2_I^(0wGG9?u1x->Ha1GyinW0Fwvg8VA$nnU zuh)Eo-E2FJo9b&@a-Y0WSvIB zs;;4paEes*N5SP`|68xDn_HDQbz^b;9jv;Z?sj1>WhwZ1%@7WEUl?8s2>lj1p0yc5 zJ=0=U1HRe%F)}NG@9wiOHgxT3Bh-r?p^rU@S@1JYB&zn7p$Pk^z-rEm(h>8fKc4Yf z*s+*=xf8N>Pm=QuOHnU6GWhI{WDDltzNPo0-uGQPHKGYb62C0FlY(ce9`+~_!ylBa zCM*EF54)0DOC*>5VKjq#kj_t4At_Bddm=9Gy~Ll}4I;A=X`0)u9~(_Vc4t*$Ql_ge zJly0-BU`%pDX42L2*>@8LqofmK|3QjG+g`Nuxfv=^R4~csy%lAlw943RiLy%ojbg@ zcS$AbFHzRq7g}g+GkXB9Imq7rM># z40M2->+U2q*%6<%3Wn$s!@lY{^=(N%*m&l8Kg)StM(p4r6zT|nZB{0gmxU2_j&ZrE zFj5Je?KoOYQ`9LE*P&`jSYN-XNuM)$;Q4frhQlhu4n{?JyXV5lT1b`)@u)2lk}{Ae zy?32S+w+#llsWXoU)0 zW%p!O?&&C_3%A$l)%pQVsh(iu>NfSpPaEzPW^T`SpP|cj8z%URcW%`%lVfx39U}<z# zQb;7W*pu{+wg;~{Zf2QFbYAlz9E(w-3YSJJuaJMaOhHcV`n=|j$Dm`AS~jE)9?vO; zawLCt9Ixb(Ic+E2%GS>Ysh7UM?kW=&X)w$~>5|znP}E=qXWRZ`-{(td6HI_cZ|%lL?5 zs@Rez+qf@3n~bY59R7(&$5=yv#`2b=xvoyMd})zk1aFO$82xkR)A(YT<3ZU_#Xvd_I3PT6djkY;{|XN>hOJ z(ktnxh(=!GDQSKbMfet|{;7QPwktu`4W0%9!Qiae_=yY0q>s8qc!Of!gD^?h&D%;h z1TS%4(V}GK0zwJ#AD;6J`|56ID_IU)qA^R0>qkC_Jr(uku_3g*fDw0Xb-8}qAbt1p zgu2H|f7h3Gu;~k_av6^a>q@qmPM8-jKY1HDGr?K%2d?7T9XW*=mF0oUWrn45j~4tA}rSE_HH|l|{`G!UNEu&(o-v7k3E~%2XKUQ^c>; zeJt|%L*~mP!yx{A{}(xr?Mj50Y7w`+xbJ4hn=0SV<&Qu0&y2r{ueyGBwD3~LA2wgg z+8@0kTNc1QytAsO>B5%b>ca8rEmew-UEH8xPN=P9gD8}&dwb8N;8E3;(RtDE$eocT ztxdD9712?1FFfgk_5<~SpGS+Al$kQj%~7|*=D5GG{(N<2dpVdPtH&f0LGCtu+MDyM z@ZG@il0MWShcirvRA83=C8lLLaJ`a+TK zGV9=HLj7u%>rOwu*_wQnUNe3+TtQ>kvM=CH(Y#GN8+0rG-j_#Wff+xqmy6{*qG9oC z9>!`|EY69{0Flo zKNPq+asTd$%9^!O>uKDcW)nhhGk`zOf#L+{WA3$D^m9~sUQjSU>&ER|RHo6S_NXCA z({G$Z5;a-z1D+cA%hu|Pi&{uQUx>geI( z32Vpwsu&S|`QsqVW6pqQRd6TAl8y3m^<2}og9@dAO+#W9dLKwtUc|B^G;i!xbgEgCsEZIgy{`Y2o4C*?#w|`Y9Sz3$GdD{g?k8Q9V{#dB zg5RAbF_7$zB=)e3T7Q34@17=w8EcaPKo~mQe}PJq@2h@JN3Ne1Ux(${aE<2S+v%A% z(}Wu4!c`U&2sgZo?Y1+yWN-s@E7XmjzJ8;#z5A+^8BtyRMW4^=5q6=8Y&jTB0|8^0a1N&o6X8A(iv>R0?Qx=W%6ACah4~ zb^9!7+Gs{&*71b7pjTcYHS>L>%JYxxN*gUX@jBFA{J!18Qe@G0aAeOiRcZfFDF3ly zTZP*h2A5{vlY~3kKPHLj8)*^w^v}Hm>1Pqj+7RAe zS{>=fkaP?E)XXIwPnB!$#i!U3)3de+k+&&YMntVt>=f8`YXW131{YU;UT5`%QgZpF ze?PJhcp~NNO3}nvF@MI|-_WhIMeLJ!k^)vqB%mp1zH*J^w znyH0AdT*{BvkzYalodTYzp1OWCrtxyJ2V=W#X%Ew=Th&m%i{2Jf=7pXwz5D& zuTU)s&>BQhL-2^|cfh!i2cbfiY6AlSLe6|S$FS)|b!PQXzv}lKPnFTdX-1F3)YWh<@w%@$zin3Bs9a~^&vW%O+Aj4IX-@vnG+>{8?lE9t+*Km2{hV2f)h z!MnbYYwBH=L|>sYQy^-wCWSPx`76SUhSZ9@lzYlzltr;v^ss>F^O{oSkXX@LG|#d- zmfEd#eJ7;avp`}+#mf8FRdeUK-K{YF6Iq+ppVk{`cn$cY4>Oa|?2_P4;%BdiXP>&< zCR(bwUH!1Tg}xl6cByZ)nP){VmHEm$j5Ag0N7ZVI%K1QQ361Z>8{RB&U394){tAhl z{+yvt*r*EL?Z%bzP=qgji$SuK791Mt8#@qRa!%9M-Os*doFmRF`A~GIZ`ps5h`?bW zWaS&JOHndghV+pa9POwPR%5yzDev{mUsECf(?c?t9SLMedf)v?Ve7lAV+M)evIchF ziuZ=i5x-=6bX@7G8!+a7_Z5}J;@(e2HpgwF%&26?%z zdsEUUYj5R;x_3elDs$W&@-{co6k)!7RUL%C#60)y9$*H}G@tP0PkV@q)0>!$swEpY z3Ji;6RlQ;LXk$?nxnM8<@X8Q%_?W30EzP#7`~8<5(9w^Zq2|RgVGC@msCz^ zrVWU*)mAmg!(;C(9e$lW{AVY~E0kw34cKA3fXnm$r{!F~KisJ`_VmiTNqc5f1v)|G zk&|*fi*z~q###(Dy3`Gi2h`0Uw!Lwm^aO>1eo!-W*8+J;Tvspo)!FD-u-Y8 zutIAe7gNht@a$WHJ$nvwZS7X5leNuP6Bl-a%NAGq1Ph;$ijGeHIteo5(l#h8C0rCH z^wVc6efIiZ;!vwt5|!5<%C?+_SQDvn9^!gC&+H4~QJ<+A!8crsRMl1ULrHD@ud1sl z^Be6JA5?c-y}(fRJmPiLYtAN2o5R`#?K`EVcyY~33eUSP$1mr`K4egmY+vX6gGI!E zol*9|ofZxE%u}rgpCRK*8oj14q2~PoFQMcP5Bn3=$#2jDuj8kY9LjTJeY2|kjQb+v zA1VY%D3Z@U5S3H!Ne~sY?mxX3Fo=3@;UR8=5tDPS4_-tN==ZJ_i&ayPYbz`R0;&`o z+L08Uwq^Z&KHLNk?~yg-ze1U?XA*qLN}zW2ZRGggieAjX4sKRTFv%gp;+)y^)|`IVW7(VolH}?CBbf!y+WA&AdaB-_G-NiJ zv)=U*RI|w;-P~F8o7po4tO>i5&_Tl+pC6xfIveqG%}r{$#T{)&+laF<8k(Oysh?^( z9OD|vq_-_^{qVn0*IE+0nEhgW?tXOD(+(9cVsn|sbeM9}_b1#$Niuhtar2iLKQ8fz z6U{^&8_r8ylVmb(znGX$}14wh}Ti`QFe3QpYW8PsD|<5grhGLcHDAbPUZh`-uRv-y;}ur zO&#DCUjI{T`n{t@t%#KO=4CSTal-$qq_A$awpq!0d14ZsT`7)<`Q2%@3M&1GnOAEctZtWXJ!u* z=X*;qRg2_3r#y)Krn<3ieZ9R`6@B%zbu9YTl9uc%`+Ncx7x}~Ij$vUjJo)1VDrK_i zq$1`vovYe20usTbXBYH)&q|n$lQysCx5rI>r(G&_vyzG~r1);mD-Kk|tn~42EJB_Q zcubDShTev4%ti#j4#2H?0v(<$-7*TJJSF+|F<$z`=)^8tVD!y-g{Ut+1jlTx9WOrj zSVz9($dFD{`FSbCDeF}(OkJJFVw$AD*0b`KE^z}T&sV7i`{&oKw1#W6Yta<~&%_}4 zy~FweHkXxK8F6{kTNWawb7jif-S`dF|%I;CnQb`qh9CKFEJ^wFsEd<*yO zj68cVeHKnzzIG)yI>P#{UC-T&1nB#<1gKup_|;3XNCln!&X?m%H)R7=M~)wsZVn6v z*LxZ>lKRr}(Oi0b!DH7pEdAhyP)vhG@aLz!mphuSReO$PQ<*f}T6q?c&3W@I&e35` zy2&9>ZT#1Z*CBn{eq`1UI&N;WRQbL^zAd35{ShL?&4PR1dGYMZT@u_CG3Mt;!L;FP zxkIJBT`nzZAB#I?-41`I741jhv4KEchkaR*gwI@&n?h=t(vf_1>{6TAU3K3=@l&AD{p7YUF(xMWrK1Zt;ajZA- zucbAy8ZM>Wug!MPphLU=ynWt(uH^ZzkInwQIQc(5Hv2Q^_dCgdA3^f}^N;Q*z&`2x zx(s|BvkdXp1%UH^a04K~!P{Qn&)dgeBETik)6UD*)kECP$I%%-hhGI~wbV7$0RjR7 z;12i);8CRQnyRWc`UZOHns?Q}2mm0hwDs}vxWEGdo?Zcd1{$|`%*-u#Naw+m(b&Ml zk=OxBTYG;W6@C4?=h>g*o|yWY7XU6x{Q`z71O@njFe&&p2VnFA83bX;bJ+P$`1Uz$_a|)j8^_2%6~sB` zmD=9L)&YcJAS`P4AK}jb0sqMZhyl{kaP)HYbG7F=*9E9qCsz;0-w}V_{)g!Q!2G`g zJv{-4(;@V8v(?fE zVRjIX_wzP5&kN+4u*A_r<2)~rKH;EGfd0RDAl&rw&^^x!l$VIw(f_xMAWg!}02d>$ zo}Q;Ak_+%N`d3;a3ny1~&41y;d^~>FMUXe5GQU8BbKXGuM4xQ^)HFaCq(k)G(aZQ; zZy-#}>R@~OT({yN3;|3CYyn5W8?XcK_J9{~2Jip|fCg|2eEI->V2l&s3V47aj$o`K z7~)Dm1-OG@|BR>q8|Tk7e`7fRPQ7`a8qDRNasJF<>t8tDL@Y#-MAG2D6470v8$=33 z*8v_PDA7$K2+?gY^g5B!UmWQF2cJBEGq446`X?P?+!vw)Tk*Np(+U4v6{T}{F4;*+$ z0l?o$`r7Z`1Q&0CebX`?{{!r=NYeq}>z`~|BM2!b8>dH4kYh;s#q{)KbiSD#+e{`J_`F}*)Hs=zt4!1cY=1_jogLSkYfP!R;-U_3xXM@)ZNN`-{so-OGW zU&d>XpA?hv-fHY+G8ji+mA3PXB&T3zVP#|I;};MV5|)vblUKN|sCrvXUE_|XmZ6cc zi78mT_709t&MvNQ{sDmxgMvdsqoQMC6zKN`2{3u?aSBojm@oX%=aG$hetoLzmCu4I+yd`*FVYy zDuLhv5fLE~>A7447lO_Orz0Z1EJZ@Ea*x#3m*L8_$7GDRo)kCslJiO%pqcFa#wnPu z${_hL=c4_V?0-+N$p2fC{UzAnaxDNC3BgK9M@R=i0US;ccz)p#1!w~NfB!)f$$!=f zgH~XF4B}X)aje*hF~l+lCICl+9!lbEowiRxaaEPW%=I2Y_*&4PW?|Qncz|-mAGx2R zIQtkTRb^F?;?*hg!o^yy5hD?QYKq)C=(?}c3T$}cfz@(6K+HET2D5)2?2n?_c%=IT zCBa~c9qZ?!!$`z5pJ^bvDKJHmr<7~2VNAB-F;#9j$=ObB6b*}xeH}JJKII5>MS7EB>w9sc zdgSg<>O(xhbpP}6raty$G+itLajU7jTSudBg352=yBT2#5YBg#7(8Z7pEMFT31{k5 zMVqcxBt`H9Yp9QdNm=gO1WWWSGfZ}BgoN26SFlZLTkBLsN@im|2;bEty+=Mud=Vb~B(K-XF>j%kWtaOdWEZm$ih1wL&YX{L9H8O->mB~stv`kz zZjB0Jxpl>DKCF?cy=ft%N{TIvEtbL z4j!Og9OOP@Ie^#V0gbU^>1BR6>c=}gaC|2YOpQytn~ewPV0w7qb}=5H*g4w31EsZR zg*_OmPFp-M4%avyJAi>LLrm==~BQ_){U)~Sk0i(OEEgDfJ(E`OAu#EPJx(aPrK!5aM?cp#~z zp3K@hm;(ik^-$J9j?}XTH!Tkhba-{~KdoQhoMjJA8o~I-DCouce&)#ckw5N@L)y?P z(1@&Az7j;`d)ee}%oB&K|N2R}5dx7`9B$gpCMft++|MbIXLqf3B1<#FxZh!}wnO1V zYph}+-&h3K2kDEBq9atfXS%_srE>LIrv0lzCJ3aIuNjOVN|jI%+iyBx-1pJ!ZUh%= zBK?4!{Y4$cj|Hw1nPR(*r@B~w`Tm-fD5>BdeKZ(|xq{hL5+G8mJn9Okz8+=?9 z(SuD`=RRX4J>{`!t2n#FvfGFU;&m{pAok0&!?evkJfNO2uzzX;l;{lXF4*cMOPoWGZZ7&VY zVvIFL)yJ*@Z49`k& zGMg4p$KpCeP~H?)?%gaiFg|UzB-gn+aJm3HBT}f7$E(e3* zjnT@frx%@gsf6c3+g*g)y*-fe669MpYkHdtbyD>fj+|1xc+lL*;al-%iqhDT%LC>rN!=!5g#{^AdO>|da; z=2ttC;P;u|HtMOCm+a+>Cd_pf3Pv%aCd*c$;CAtn3u4kJ-P2K2DZ3(Tjc&3-8?CdV-pWNi95ZE z2PQahY24gkomWl417F4jCnFT#y&Nq)tV7649B+$gT5hh;%7QJ(yME!61e+YdjI9AJ z5xQU+ggh5{)6`WoyGqUbz#5o3dO9ULA>f$=>~dhWwOaB0ok{ms8WmDt2Zcba9P>8O zoXU^m0UkV%P;{&cA75L;1E+UarS;QLZYw&+b{e9Skj{fTES4C@<LM=h2mKkuDkg{bLROOZr<_(VYL0cJ6X; z9fUZT5 z{UNhsP!*3cOnhZQTSY!I;{VxE`6vI-WO8Oj^yoF%LaMab2ej-&07gOdA#*neot8m` zNq#YZYSGbMjA=Z~^F;QP86%%0d%rHMKR4}Bv(Zf8A}XcQJ!k=@^FpWAO^;oJGdJsn z$G}mk=)D2XwpU|UeQzi8TCdPxuph=E+ub>G?1b$wElv5?`cc*MGXuuuEb3;7ABEJ7 zYfhl<7Mf~WW#xQ~1=$G)kKg%4uwrWZJ2QCR>JK+rax*&_jQDj5r}sJLSX$1=6cyt7L)6#1t$PV>Gf0S~kc_13I#o@%3=2jGvb zNilkTcp!>-p{+mfnYU~Yxiz+9eL^C&PzigvYGnXjWE0tkunJ2{S)o)mM%CT=w$V>n zHc|}l<3&9~6%9$vyIv*OISGVOPM+GX711o=hOLEjw-@`D6_fO*JH;v@#w_ZxrcXD}UiWPxJ~m0us61{)rj_Xp)e8}z-g z`Im@+wuOq=2+H7>lV+)i-DO#f_B@Op-4ib=BVr}77TXhE=0>N3;eyh?C#$u3o}5Zmb}6X~pJB(`-`UsNM0H);J7`p*#x3Fn86unM%%Wg^E9Z9x+}- zAzKh+IbHwFTEt4}&UiqQ$daMSR6SqpnC@hqxVP7O>;k!i%~C{4yN9mQz;3ogukFs7 z)URwRY<8C8oT!*^PO>%h+J{c5HBM@qN4>}@^}t56&k^!;s^n39pQ3kAzQ$GhrR399 zWIa#UEdD4*v?oqGvj@1?e-}Qio=T5VG!k^42N#c4f7DTDtTKq+Hmc$;B~v%dxTllZ z<6iX0h65tzwsNu49ZiaQx{vVYJe8k}kg|0BZhfkQtwQz?Y&-dw(g%lAeGo_ z@cC>UrG{-K*iYD?EB2z$+IOd6K^3)k+P6Z21D4%V^eY?%4HC}2ywlUNvB$USonXzHriU>KwTZ&9W8M35 z#h>`rxCIrm=urGHaD`A|od+c6T(T^@6Y6E&D0y>ei`C-z*`2!Nlh!5<&J3mqztAiw z9m(b4DD+N*;k~9%2MNkkJH4pyrw(UL%4{zX7<(P`X`x~gViro#Npr%4o$Oci`2>z0 zj98TxAqj6B=MEPzjY{j!@+?`ga)WwC9(fAysJ%X$9JIUvlb}B17PNDjMt}xGJWF)H zqAXI_*!W^lVubJ-+PL6)xvVzZ;{n^ocJr1I8}fwWgt86wmg^&5gWY_pQr5_C-OGp1 zRR6J_WRP}4G>)g-MBdbLYP&C>T4b5&vVzt`a{4Ja!Hx6*2gn-G7-89E0jIjGShW+q zw8dplIFh^1h6WFKFS|t$qsvz!AtI-07{{+iRTXYUB(n7s*j#u*1XU|m=q1)JxM&a} znm52=EjC>`)#6QwweN$_PG@LhtI8vo!|$CjT4DVMx#^U_xf8JiRu5e8BoD12zpC4b^}JHPjsn=U+#O+vu_FRvcR7oU$b3t0WITD31Sn zWH5k~eD=~bu^*Ya#@!hZN&QWY6Fz+Bzpr`RW$7+)Li0-pPb%Q z)fxMeTaQBMRFCdZMJYMaf7xGow7GH>A}ncY};zsea*vQSfF#s*o)TK3H5Hy zmX2)mj?{G#9eK3<4s{rgycZ7;+;oSDROz6PZ zxY$#$2GMRao3Joj@F!HcWiz)ng(I;lzZ$vsDnQ{rot48hkvH@Pdz#AQ`r!#hY=>iy zxXDsBl6stJVpnLrl{M;McP8p#8pXjXI6gOC1JLqLIez&yeXu8>HNIE&Y9e5f>e_^-s`2#KmdQPyS8dtGv$FR`!z`xC zVwb^98P06(Pe~l6=^0Kp%-{VsqsBD;cAhAU&wPJ;ks#~ zjjLY>(~YW>kE?{kjh;+A5`?6cy{}o^`RK96ct1Bs<1H=)-xD0M^yPfRRZgDN0 zHX*%!aS?E?`_dr)m8pl=R73k(KEa(wSB29$<+T`BWAOZc6LZG+DX#@}a%7-GCiyFMo z*s^y)7fq}L5*%-(1O8u3ckJG4O^UWbcvEh1kzs?7+!upsdLy{o(q~oBX;A@}BoJI| z-Gi`*Lb^l0vauz7tG7cWnpnBhxb52^5(7~yUE%J!4i`Lki(nm>;J<14Q# zWB$TV>T)*B*X@%>Us;Bm8OE?04?3|P&m~*)`;_Vy2x_&s4$D;aw#Wlk;b*oz_)R|06 z#|ee?XG^Q{j?a3cX~zO&!LL!-fFriHG*Vf04Z=9=+rF|VYHj=nhRZ$6F(-LwPL=Ac z*pwh( z=s`&59X!zAb{!9_zZ$~>2EnShgj&#@O@)QyZbFeO(1w*WcAH&1=-`We(+5ATE5-4f zZV`fh@Vx%CQ*HkuoIA~-Ks0nS_dOiF5(m0B2yx)i;#6QYE60HZvF@D$cwl$|`hUu< z;eX9-FM>N_2ppa2``}Dw!8^!`-5s?3Tb%5KU00kY#yhtAQt*8(3|9tMxvN}J<;$ma z_g$+hUE}l{+S?nt-8tgfzYU_olw#8tTgsN^P?1`Wn0;d5X=8}+pSVf$+y zvk5<7PNpe^%*|2?rpa%o2S%upj$8Q)lyxuJ2~8RKGH%VjC^ezW-RboiqIPW0v6p1y zGL9aYG&I7frewJ3wz4fC^$t!k=`InIwjsV`xh$G?66@;Q+qDY* z%5O;X9bd;9<`@{G*m zpme!W%iIHGPj6OFdRIIX$Bx>M$;TapwF_DV&g zq^qWTRLt4-$S-GAbuiUpR|eN>TY4DERr$zHE0lT$qU|WRC=Q&CnYYZslq$AyS0)0f zpt@7Vv#V%awqIZvN4T{@@f58tL@}G%x0N|x5(}rjWWmVGZsg`Ugfwa3 z)8MMBoiArqm=>l_KhQB`m~$cn-iYedKOl!3KGe14q7af7o~dRQp86H$6Vdc&C>v;jDe+@)b*p*^nS$r2*#2*rbjkYU+2O!k&!;QCT$CC47IC0JuJWwn>UUBv$;)<2c+;$t3 z5kmUeE!TmItz|M$2oKoz#7^359W7JX;i%dgJ2M8vd|gSS3!a@yU!>RIiu`b2>%M_z z=?P65bhEP;j=B_s2R7cq>mfw&@z{@WjL$kAI3q8*j$MjihOuJgidJcuBfx20Q%{Vt z?dtv8?Tu*9xZOPT@<~7BLeuO))$Z!kW*X|?R6L-Mwq!qw3kmde^AZavn~_P%%c;Xn zI*Ru8=plEdJ#z|xRM4!@$9Gw(3#2zh9m;~Z=$Zm-N| zvt+jxS!0E)wkUu*%VnS&mnF{F{XXAC2d|J|rhEg|$c+&aZ2qC`iBgrXtA<8qKgN9sg~tXa`NN-tg_kTX zrG#lr9#(vw=&iB|4l+$us>^1WTVk3EpFd;3WwwALDZJU$u!ikcppP=N4-0ePy*gD9 zTOU7$X%?K-Y||~bM(9*o zg>IqqZ6DxuQ`!>K0` z5OVZU2C{&fG7rjXy_BM)@hWk6S$k(Eh4`h!{>GiolarI!u%yNJ&K(8It_5i`j@)WJ ztbWS%67Ro*wz#xgFy|}MxVK1qL+;G!s$HL{H7)uYlaP!8i^+Q%H71@P>om2>7Blmp zd?@hUYXhh6fU@eKvT7mHmu-b%gDo~B*SI*UE*8=0WS>x#Sa(sJIqTBBKBSOVMN=Qr zWl+XvF4&-0OR7Rw=pi?Hr3h5?8O!;5l+~I6`04X{4ICe0aOGJ3R|SsgEBEPlNYGgT zGVPcHbc^+x)#LOvY4#3JI5lOYEF(Te0OMu<|mW*RTaMXALuiMOoKo4H!Z zJ}Gp*V=JGH7JYT+$z|6|Mgw}K*{M=7nv>|+1gWM9-9T$@k>a>f%w&vU$5TfQiD3-u zhljlLlgZUaMv{k|m)UeR)x}=4kHzF^nJ-hPXJ&?A=nL7?{T?Ph^|&oG5wqCjpZmtu zdXL*W_9CIs#acp<3uF(}Rfv34I3pE!b5D(5l6xc;$w^DBPIbuWWww_i_W8 zOW0qSSD;(-)lgp%Bg_i(^hFIEw2vv_4s}Z}60>yRG95E*6)tkonpMkN3*jxbETQ@G ziZM$|yV{<%R394oriXi-eRmzM%Yv_DrAxDHcaZ4G8a*a!V~jz*%Vc>}-^`#%W>>zj zQD%WKLyCerO<`>@n}W)487zNazVyl0=~u!B?x@RJFWA#dW0UzT$}O$=Q<>APCjyYH z)lW->GcsqzWQEjp-6XTur9t&S2q5&k%T2sjHW^gVMt^EGd3oxhIr3wEA!Xr`qhl{E z9&n>;-}~YFgZm|n@SgUQNz^Z$I^l^`mi~CbwaV z88~w%)Q~JSr4l-6cAu)#H0YnKVjZJNDmRERvy=S39#B>{CQ_t^_;pCl(`Pzx4}p># zqe`xOT&G7{TxTI!Zn}WnzGV;t6SD~1nku%IGv*cf4j#Dv>&MdXSqIHD^Tr}~VrrSyI(fAk!!FpOoJ9`nzKXk66skU1*|SuHk*6@Do| zLs1>@G*DM6b89SX%W@m;oQ4&$c??lJH9+_Dp40=|-OL#8jz_un-P-ZjGOGt z#VE@4X>lS5*L2-wcO5zRVCUv>*{dv*^N66MJI?Mo4IkP$7@~$7f3+yiEf_Uz2Z)&T zIfuwe^A4Gm+OVjSRIhFs6^f@`k*TqB>G(XCXxaRAl;yewjRz?k5))ELDpJ77XYnk0 z$DlOv{SO$we`KjeEPJ=u#K2UvPfO)+x#$gVhZ(L4nKjq%>fde3pQ4=?VgQY+jkk)| z(-oeSCGt!tmOOKQ$`dL1;pR0RnI{N(=YaHgPo6tF5?#;&zH_J9u)&m0Ah0W#wm~Sf z*>-4l+$l}Pg3Y#EsKTq00I&rbH;mWCXXtk{y=rSt{9({mbg#2;RV=mO($Z=w^7;Lc z@Z>mwsv@VkyVY4Dl#y9NGkybW)rw{oj1Ab(x$0>Vqk9=zGA>hJJG2bHCYO`7Df}{D zcGtT)B4u817b3vId95sqgOu2m!MZ&3pkbJ@-^Z6AyfX2qgN-NG0L{r{Ew&x=&?C z@AaJuxi+X8)L~^Iug|8X9#NoyqKR%!TA~+Y=>NG)JH7i&mI`Xtq}UY`n^*Z+vvh|& z?8>%9s9JTe$B)l*TS30t%s*!)%!E~#28KRhxM_@#yXw^WjYf?Djaa@^#8jky4F%l1 z-_}`!NlRA5ur#6KK9#XixgK47vZ>g%aQ*@Ez?>vZ??!QA6{oDFm?g8=>Q0~M$NY_5 zK|5PhDii&3gI2C`Q8jsrr-J&KMY2`xejnlbvmXo5`o!8NvCWNLZ`7!dTC?RT>I!N< z^?i@=@Q)RVO7#g|Z$%gwV;BYBH`kbyYqoS43#*IOELjNzMwETDY{+i|%V)=79@BHm zfanXQ0nL1SiTVBh4yOWdnuLgJFmbfT&{$&k+@%!fwUBis^*VeNn)))$}Mu?4_USBWqzfn@ zy+|h@Oe@AU zx~FW$CAIIBTo?);c& z%!RqHx-~eC{i=`QfIS8*bLD0U!MfSGp#WZJW}B2#ey|I$1*cL#@_l|0D$IuC!q9ha z(DkSULAY!E^>3o_fnUwfQAY2wpMd%SmMl!j3e*1@bsfoA-KV7s+Gr*RjL2>LS(q($ z!V|5mfoAVuCGPNM;~HzchSVF)Y^ys0q2s5pc8mHOtZ0Q!GwIkxMa-!zWRY-KA{)

Kr86+j3E^FP_e>1*#WvVHm zHaFK{UoY25Fe5w9!PoPnyOH_o*S&ZRM?V`+^4bZ#;T}TFJDTYHe4>~xn-9Ggt&&g} zN%Hm+5m_;B7R&Fh0nQTvTe@OLWCiq=21#2pJ2jB2QZLIa8mSj!pp<*9%S}VkNOygt zK&eJjFWZPZS1$qhSgLwWu`5^Vaf(|S58a}qs^ojYJivuQxkpzO+&)h%qmSNF2iU0f z$h?|zgC3oY+)L%!F%b?6V8QY%@_%A*(&Z}-ei#ok zJOUMu0}DxI{9-WhJ>bbX0<9|~vM-0cu1%Pk*5ZEP6~CtB>bQI@*V)YO{a1ajgId{j z{vAW}rkaDV;D#0~2X!_uSiqt+`g-=w{MaS#yZ3H6d(4x*r;;b(?YhSKnE&3>C+y6I zs{5MfqZ64!rZxCn#~2l>ho8c0cEVpPHcg1yFc+2DzjJ=JGfAdJ-BBPq1C1<<)7+3q zu(_hT=X@o)c(*!!GA|$tKa#@UhgmWTXk}Dw1e&LFsxcA0 zYDS0cF2vHCm1AvCvYL)fJmxi#?Kd&S`xkS41p7luuCq2(BgErhb}{w4q{xNiy#W_R z3Vfl8{R{13F&JaPPTcZsY>;@G9j)XNY+jSp=0Xm$%S7Q37-nSI8({`$P8& z52KEg$4=+NOviWOG}x%U7;M#Hvu)&ZE1Wz)7&>oKH_<4Io;Is7)ibA^#&)9w!$Rd~ zSDM9Z(4KFJH#$M^v@jg4;3RzEd1j3QCoLyxIZDpP@-%qL8?=4K|2j_5UzqxuGGn6X zjQM_DyHOtCtA8RyRyg`?lU2>Yq*D}-sDWgdQfbRax9ClOV2?`3&fT(*115{2Qbof% zPpWdo!I5O6-?+9RA0>R?40T>dps_-YI^c*cE`1WBO%&o+}`6N0t1XDS($Q}&4dY}zG*we!hN66@- z{N{t*0jn)wb_q=%*l6R4GUaLA*AK-Dt)?V}?J_rAn38T`UX^+(zF$1v?ELfub_}zQ zCjri^hy*d_kG=?AMd`PfP4O?5K66=YA#pTgLVJgT7fRV##8Ug1Q9>avS&fks9exQI z;otx^sJm)4)?$&_l(RdyW2!6ywnaK`FK)((-U~*x&_Pm&L&aPv*If3zf#^ANVjKxp z5veuB-aW^LEx-)4XHqr_zJTdrpCHXCRgtE|-E0yg9;uERejVddWPKMVStQxZA_;3dJ|@v+)kNo#H0pCOpr#?ukBjdAM5( z+T{d0y3P0b+qc-hMQtS?#;T#qS`)&yKB^IvaxB;$&H%e! zJ0gI@)J4fq9(dx;qw9ij8mMHX;)LfG3Dnj+SW2Lo6+K>3K$f~esSvL+{BgMY-9|B6 z+t9*gAk-lfeQV$$ybjSmKP9{ugvsv|6kcR+pAQCxU?@(*1~b*Y7mY1*#3UYp7AH1H ztXz;A%x~b-0VjdAjL=h=^Vv{-WL?-I+78hOO#z4C&ST`p8u_yA=g2Uj`3qmM*;W&Q zN1%XK_y;p)fami4b?Cw)(8TK_(5k>8AyyR=X+F7#C!t37>;;Cko1>rgp|9de+7||* z??Ax^hF{pRYD-v~gDa3`F(9&Hx**rF>;({&*&Hs~vSB?W7Bi#PBD8xKoK8l$IG~fA>a!=^oo&6F^8Yzta*TEAjsD?ns@j(%7a3Pe`Vqr^H9VKUb&@*JulN%dlz zRmcxD^YkmePAkOzMetTVu*EEKcgtBWpq~@YLIJEe!{BzK(AO3VT0w5m!e!KEm?P%E z*o6wB&yhL?c0mmk#4w z9JL4oCh!y(mlzq!g{^Qil1g+~ln0#DbTyMoP=*~+w8q&1CktSKS)jj$i*};*V`SX( z#KVfA5~!rmyoX{GT3Qn&Vhp>|6#5DyA8R5?ez7$bhl={d$;=c^!29 z2*jC53IF^SdkrvCY?`nZ(j!oV9s8cH8xOw3jrhY|Y{V`x1xBG;4@}m~1e{SnE0{8I zU~*|MBY@U*8-;`vIn_5+pkp7tga&O6Pe!WOqCQc`saROCBtIGb46m05%TnTKmfP^e ziC>km8|dJ!jFXsvs63qF7)~8IPqLOG9JT2oNRRAC- zFp9~vMm${W8*%#JYuKG9~9*^?a$0t zUND5Y5Ne^lmKRIoi`MG>zas9-2)g88%aF@qIL|_;5wkFz`MU4ZIVykARkwAi+~%ay zr|P2M3oinwlNRn^2hc^x`4|~9A(-(?NQ!cpn2!u)*80>ykcY*d zqxbMEnXO=ofb)<5`dSRdau`gp0J?ZRRVnj&C@*)S{F!c~@b=ol755puJ~+*jHNR%*lx?3DPHb&tJOB z>DhQ`IBoUrqcYO!d?`;(Ud=nO4~=Z}>|q6ux(0J2+V3vyJ$1aUN#&a6pp%d>lCKS% zojCx%&J~a-xkV?nRst&(F8D&)$Xsm}&*!QIuLj7iPuGjEv-Ev}4Xz(iR*%-5X`tFle!P1H67 zCcLxT_e&FqmOk!C54d@Il26b}P^rsos|r*~`xCM8_S+*@EbNR)V%m|AOAHP^rFWk$ zlTqBb@-$oNQI!eZ6%YH?eodMJwX0t9L!!E(! zvg%HBk$$XkbX!}EA#bNy3dFMNyqk1Fi`*zRQSytx5{!Ph3}A< ztiE2AdMe~|sb)I7Y)z`BEBULqzPFb?6=d63RV;mMEGK&WE49)$6c!=gWl^}0Z%i51 zrvGuji(aUwaweZLTz5^WL@Ycv0eRWAjLFl2v^>T}va9&~9d5}2{dX4L1j=t;%a zQ7?B6STrpVy;s$~u&St~tKKepW?A8$#W#|++OM0&eaSst{2MnHzOsyb@5Ya?Jsz_` zQ83NQR8kmBJqJ>ce^@-FVNqmX-nH|3 z273l!l6kg88YO{ zQl`)La}2{JqN&$h%*Y%Kg-hecMIC#-3Euy_k<;7&+5A{nUGwmheF;)`TjX0JShR#i z$ttQWx`fm%ThsH?^5>Ur+E01!AX=58xv#w!g(%T1^7VD`y_+9xwZRJ1K7D!baPTtUY!1+MAQABQ9?U;#6yr zKxcB|O`*j1y(sFeE1+IL*h|^q*t0P_t@-{}@F92RWqd0XT;Bcpo^S9055B*7As8Lp zI-lVPtQ+)kIrOo>-Z#4&<&CeM1lD!HVudRVuu78>Pk_4(Risd19L)f*6uNdGjk|_b zbU%>C4;WEykf5zllwD*lWH8n83DG8S$2__e#+MJ}=?dU)!Dd=5m@2oK@rHG4A@hbf z5e9>Fi|r5Ek+RAiDKoyEJrFn=v7Q;rt^&&h)D2M^Rx1l8F_wcx9LQpK-oWW`tVmX8 zb848_Fw6?=r-wEVh3UM&$)kDN?;w>hThWb9=)%3}cJ^uTIT_B6IF$(;8B#cOzi$H( z1*1T+pTib*XVVXOLHvwN~Eu5*ww0E}B`X%V5F?A?OP7*9ZH~60iA#V15S- zN1(U)n87`8E&Yah@H!&K?2-;5`y}Nn&X-~b-mxv)*U(?_y@+7=eyjD?w)eM8;kK5| z$?n4h7GP)=nOxta4Y=ShfBTfRuh5e-RemVm>W0(18|t@4M4c22)m7+qn}$V=OU%Bi z00A69WY0c}_VmF`>MLsEl!C?=^+#<(sMOz0$y0C?KL36b_WSD|h*+K&(+ZUEMyzAG_vW*OrO}bjT(B9;m5b> z#Py_P)0y+}$i`5O~NgYEFx^wH>ALe5i%njca6`wrZC{qrT zkrT>t3}$Ux@s&?7fz(=_Jrf!zGr!0a+GxB8egvk4u`jSfvHIO+0$7*DObV!Q$9ln< zFF(}1vwka7OhdRm1wHOzP%4(rwX)wimhoIn_9DryG=a-r(dnG}NUnR711oqUc&jQ- z+Rhtkej;Fm{mhpw3IqGOYlFCJWzfDYJSVnPL%EBc&f7i%JZ|0vgtVqxYA7RH+9=OK z5$5Q+z|BUtQmEf;Y}CR)7;sHs)@vQPnT>KeXB-$1f)lL9M%kec+MeJ{MypwMP##md z-gv3_wivGFbLAMJ0)hI(0>GU_JUg?}%sT2Rd>U$o+GrF*np&9*iOy3wnz0XiY#4Xc zE}A!vJ`0py`yNeA@Az|I1dxP@UWTR2qjY(9$-4)r(!(XG(yOghfdH&0X> z&kBun_;PO)mzzvE>~nX{_-3Sfy2xCeo-Xf$=~V5utSm@h7KGrO&s5vAiAu+-Qk#rz4O(oOm4w&&8VOt?U<1HEo4? zr1P6#=E_QO9OJkEVHo%GfWASaSxB8D(maBRG8AViHqBqc-qy^CZmP*|ZvfLk=D08} z;ep^=D}UW=#TG5reV)KDcjKinUpwZ**lBps;K<$i5c5-bMyR!sP(t?Hs+So#wjH&_ zj?-||Vl4}XsV{<0&A@J=y;|%R!fU{!GpT;!b@&fsIO&pkh#X7iEzpiS`(*oGeHAti zIBnjNlbK$BVm~p}fVFG$W|PInC4Grtx#cbnn`1bz8P!cR%bI1a4g(z3fHPZ z=iq74p35D=OfXd}1i3}oIFByddLy+(7%-4o{V4+A>mxR9jQ0j>DK=xG9f;(;J@i@Z zP+4_YM#skqy$)!alDz7{T`&a}q24mgjo}FixU`lM8n}uqDk_lU%a5S$7zDOTX6(is zl#MUHvqFvv9o7n;ir<5eaADohlwFyGICHeJ%Il|q+c&?=Sn;7MXQam^Pov7?(?p=Rq@5XfbSQ2# zvy&Gf*Ty=f92)D(B=)`swPaF$*f|27nrDK4Q#VuQ8EU}?$pFF6ZE}P_M9g9y4uVSq zLIQ%Xy5Mys;2Zm@Jer5Gm0lkKmje#+ZmKhh=YTrBSY0@qO8H`}RyO>Q5IPHp07h~I z>T3-U#cR?7i`-{`k-9_!-qc4Q-;dEIJN5KMrD{UD#2M~7WE9pIaym_vSGci zxpH@JDY=KV+a(thk&b^XF{enh6gijA=f`fRpO|CowrEt5AZ8ru8Vl)OznuGQkwKtYObmb+t4X-dmdWW zZ6=NhZ7IOEbZ5w7?#`AprKintV(Ox6`rJGA+g8V=0^*(g3f1khSL>bQU2H6zzHPC) zm0F-EX>Cn(k}a;_Dp1J4H%pU3ca-wjd62j;uAcH|!e`K%ca&<3c997mud$gYElJWx zc38S4*jt=8XN~N%azKfQ`A6pqWXCV?F7S?)m`~P^Iif14bbx%3(84hhgBj-wckV=$ zMRkssgcrR#trNW_37ii+wK%=s1an*caew;6_iNNAB2Z3LU}~sK2OD$B+GZC!pvYrch+zwm0J#QBU*~K{(V1_})N_c#(yj z|A74(rrbFNA7m2Uvl*3^S7Y^pbTF8NbBQovr>5eHxP$M-GPBIUT$sT**<@-y;$r+v{FAMnIn^4Q7%@+A?fnE+=V;dj=&`u7#sf}a@b>*=N1Nx5X9-{ zu6nsTnq3`r#Z{k>M{(C(s_>1#h@vd^aahyMe@c zuq%g&_={MAZXhT(vEV=pIId}AV;u!FLRcuUv;Ka`M|x3^%{+*W7;BZgqBjK&2jZ9! zs)I>NnC=iUt;oT3?|ScuM&*LF0KPFNu|s`*4C{EsV#$4i4YmI((i{BWl7`{`(fR+? zR2fH;x77&%@r! zqX_kHjwcmw=itd2pG}%>bu4!AAr%H$jJ7^+HTi6PmHT8>W+tu4AjNezuZlXsKI>fC^oaag12UwH0M?Y)p3nO>^h(1Q8ikBPpVx^C z#3mjmo+U`0N^1xPQAjF#2J};&9fH5>OdChMZ zH?2RA`2zS!lYx0{qr>8r5t?iPE}qwdBwVEmNt2N!;xlW4lRt>yN}3oqdmxw38d+?NAR=5k{W1VWS&1Gna;fF z_f!%o*T6iJ%h{a+30je`LPjikwx#1XG(McA9&vbHCcPn*wK_f5+q3QYA z=hcXReV^jZqeCZOV#J8-$pn!Gy{3&}mj(}X=GO4e+BcNnPBp7S{YJ3+IsU0 zOoeg1fP}LyXY!G{!J`*Kmb)7f)japyC zmzcau2y$_`VrU9nnE{d`ra&V5@8|ZUSh~v-zgM5QS8h%1C2@N1=9_J%w4>coS9ELi z%@l9u>aK!pN^;7*|EDG zM@FO*cOC5^R#9zaETE{|chAbxM@;^Fe@g4w*hci#cO3TGTf`#Ftxb1cKI)sg>NrHR zI=V;!*Be99S!uRP+N6;bc$G&xk@pqJ=n9NoH9SbtXO@~2>N=ry)vz}*!Bgc4yqENJ z4YSb|mblw@gH0KSW+uBEskbXGyr5TBwtv%Q$*zEXsI#GUlY7P_IcS{HwJ&c{yliGJ z#XDtfDY13Zt+$e4c~;wegof4*nV0!gJUKtoXCZaMYjNhA=@2daPO0t)7XB&H?92k4 z1^4i|)5t{qdpZ2MjILRhXH0pcARlPUH&f?MOrF%QK{LG#Yp~sE;RxG@8@ua;@i|J8 z7y3)x2x@!q!t(lCwrcKpe>>iyWp%{ecMlXVW?%aBzBfgdBka+xC#A`Vtfah1PW|dd zALh%iM!qe}Ez^oJtjem@%%y4S38mMb!f(bz3EH}@jhfw9c4>t}EMM1a>4W)QI-83tj12Vq##05~3DQh+b8WPsNHYVzwPDSZ=zFTQLZHy~;R4bpDXC7_R%M zH{jJEnM3B5f{P-WwQW0$qeytQ^b}(bbIzeypg)4O`d!XlLx-Jogl&mIu`o2-sNXO0 zzSHCdj2zG8(#7}b{RN#-T+((QkW;aJxl9-Q>Tg=u8>A`Ry4Oozwx-M{2L(Nxd$@ii zQk7X?eM&3uOr@+vrQ@4yReZc^!@^YYh#f1twPdWkei_@r^wl)e1#%d%-;F z5*iZ>E0akpOPS(^gCVJKrl5&h^0O?Pq_$7mefc1{r%N%$>>D7L)Z$$udi_=!^&@f{ zGKQ`EUcYU_r&s)L!EPY&m+_j>k86nnutPiT8hc;QCDMx?V%uhbu{C zJ7G~3G%N{%x=R8}4-^Ep@|NwWGfT+*n+Q*UdY-*sx}ucHO|s5C7Ac(06E)QqVx;&e z>sI9RW{`>gyUQBNswL40hcN6wqp?7Ao?kxLv9f}IM7QAdcB5!Uojo3GFL#z8FCDK4 zA$6dy^MHHd& z((VH|XVB;E!XYJu4@34(1X_ua;bz$?kEdzQuECvE~h9w*p__6%}K;x6XLm z){@NaXJmPzdd2QO4;f8hBh95uJ}c{W;)L(D2H~hzeq_CEH?bRSbu_?ma2J0pcc;d&iPBmp#M`jL6-QlO*XG|qCZj7sIfpkC3Kn7 zF_q3P@Pp(Dvi!W7OX}x$03rQdgeGaqG@5`2BY;$V{}Q1qkYVuOBCLMg=KzTC(N;)v z@Q~>fYiY0hmc-ItS%V5^HV94|YZ8uA#%=4U%{R5VsVfM-{!-g>rE0#{*ZrK8t7X>% zZZdf~2Etojh071j1M4PuUY&gCsk`T)8%Mj4ZdlI8&ORO+Y*@q8jl z$$a-m_r320O@$2K+Mj+N{k*@-7U8QffPctvC)_QFaES^)N0?C%Y#C(NXQM&-)a_jn z9c-^^=biW2>gKOGKUQOpGo6X@+@WEi5PqYadU0Ajia&Y5p3I6Wth|4U=^{mhb~H(8 z+ZSpmMZ`|VbZiRVa8TCKZc&?MWkJKl3NR%nG7bz9G-EVH>1L}zdhy4GTZUN9&ZlS?=8>( za2SIq2EM5UeyIOz{&-ke-nY^5@VRf}{(T0K_o|)=y38PtY>M!< z$NLK^+~J~WTW=!XThR)0_OBrErJ}fr+nZmz(bR*z1DBO9BD%mEb>i zZhHJ0aKjh`Y6G$wo&}vg@$W3qKnCXTJ>z&mc!Kx&k3^3^`~UyGs&zFGBbU!Mabnnu z(-sEnH%>jeG{HYQR!FSIDb|n{BMT8=k*?J6?kTgQa@lV!mFIXelKL#EV!1DRa@Wf0 zEgNjGU#M~w8h5UIn9s>O>T_u$tN&Y-@U}}9X(5VUfsajhCdv$uretysrwMgFl)qhL zn7)>KeS9$da#wk$$`Xw8LP~(L>0Jnkgl2%t8zrgKTXQYhwyn4z{G97SXT)57rog@8+!DiyH?C7GTawklQ>7DuUJW^J-u>6 zgY+w?+0|aew_ou($Gy9UnRWg{Fe~Njz99A58b1z~?QdhoO6DAuH=q?eFG>h9n-t!^ zvo_AAB@F0Xp|;F4z1jU}i133NKhYjvYG5$z^{V8{nJVA6ogB zxnt$>z~=u=o}bDhI<8f#or9rs>mr_#;%nltpiXM7`sn9-CY2qGF%l=|#VyGQU1#xr zS+frGvd~9gv6yD!PTb(=5L+~@T4FdCU!?q%&XAeJ70;${mpj(132EbMHzH#14K>y` zUrO$OcBTA^TCm&6`yW+@Kix{QUry^6sl883Q4`^aX zfj2)E1&ZF4zsbX8N1zRVR!`s>niYB~rz)9?@CeG9+=cgfuA|hi^eTJrYC0-*GHn;N zAZH-LFkP!LrAptv%!Bp+h8vDyB4ez#3@i+)njz6wsL>WoPM{iA?%BMZu3S1DZ<4Mt9ix)xT+)4CL$??v&3$E_I9-SWNX)eX^8uP8T0IpxB*V z@s&I(H06!8@x3)fmzfuzHGKcPs4GhphiD3d#!2c~AMNvpgPlNS&;YC6Z{dR@BRb4G zrcUXlSS3n`kY{+0xDLm75P?%LqsFU?&zsxN6f@=Z@5j{dtBCt^oTG9#8L@Su5Pp|j z&rhD2lv&`2&k=7Qr@-iTd~L5lL-&1b*AS0tJd^GznY!hb`jWpD&Ru6)F)i0uM7qTI z3aQ3F8GGZ2u~nzpfj{Jyb`oECX`J-Bw{RC#sq%4j#HL&!^0vw14X3aMCV@ykTdCSN z2}M?+$?|tN0`X+@#O9!-TbJc?(Y%T;-<}S+$=T}K>;4|0!Wg7D&a0vVU2pJtY{yFO zb2MJPwdxdzT`kWsb(m6F^a~s1(1$8^$nm-3HP|L4vX+Cqu4GT#eZu_ueav&shXaAP zVrgLN21>U`tOze5obI0Q?{-X=x7w%{ZSK2KA!CM!6Q5xKiWQ7ayck%W>8!Q;cFpgT zKDhaWS_w|}W}HApGGFKAu9+NllLs4iZ#fF4y9A3s&FqNE^kKhyGR3k=KkrSC}L|>)V>V zl&My^5$n@At&SYwMyX^kmhAf5>D+0A6N=EL4tEW#J*z#ny&jscz4p-=^Tu?|gVPT0 zHx4AWEC$45N6AhPbZmWz7t@+?Z&~LUiZyHZ@#gZOb=j3H^3k={?EJ=+ZRvY%S@&|` z1^C5b7nV02pS-hAQm399o8V_SLrA62*w3O?axat<#_5pzk?)oL)l`4N_@D$t$8)h> zWv;g(o7_B0sl&FW7QDH;@JcqjwwOT6QC!5hw(tT zba!DRf0I%Tn$!U5MddVZTBwX_?`^K$euZJqp$!Uu#M zH2Q;dZdukkEUaFBvm=Y~&SjGBpBPlWnQGtH7m%mYb}M>YGSvH}LPKD(D{A;k{{DPr zfPqoty(RvJ{;JL`AmR0YEp$(u6a+l0|MIuSy>&eK39x7_6n`ysf2#2B?~D~(0R7?Y zVXo`rYU6JD-NM~g{~aLvbI?T!000G;M6%ynvjYpvzbdQzsHAedp_K_|bt}+70%*+f zTSHU8|NC8gx!;EX)is2Fsm}7l{rkT}GXWgSe@46dy+40M6Z%FN>KpfNb)#D0?XeaehB_J#PPa5Kl_3MY~a7C_{SwV99l0 z6b=KWKhFPz1>O+?iT3}EtYP5}SXcMI2I^lA|4TxJfIskWxPK({M=|L?68b9(1b-t` z9N60a_sAlb|8b!G3Jp|D`Q-%$3h4az&_Bwx{_ZeS$;&SyKZUy%Pbiht3wzl`NS z@|j%gZXzR~LP`OZbK#ef0Rjz*|APFl0sTV@{PiI6#|d(&eN&?!SiANE3h{S5K0w3# zO>V#9{TRT6{r@#O9b$KpT81-Q{+G4{ZUsrmgx0l0^L0Yc;bGC6HxfiaDVLW zuM+=#(vqtD0qz&M{K~^W3g%bn-$hOhR6+V%$NvT`_K)g7)S|BKGr$9*zwv-s?GMPm zi0F?NWd53p2n>LOP#tHflGcBQ`%#5jxxcLip{h<#_Ysp8cQA!#n7UJsy8es) z-#yjG0e<)Y{%j5`;Q@f--r(b{fA@g>+M4bCFRlOO4?EucxRc^1Kz;AOI{)9hEN-h4 To%-JL67a7RFhQGx-=F>ud3GEi literal 0 HcmV?d00001 diff --git a/docs/ml-guide.md b/docs/ml-guide.md new file mode 100644 index 0000000000000..012fbd91e698b --- /dev/null +++ b/docs/ml-guide.md @@ -0,0 +1,702 @@ +--- +layout: global +title: Spark ML Programming Guide +--- + +Spark ML is Spark's new machine learning package. It is currently an alpha component but is potentially a successor to [MLlib](mllib-guide.html). The `spark.ml` package aims to replace the old APIs with a cleaner, more uniform set of APIs which will help users create full machine learning pipelines. + +MLlib vs. Spark ML: + +* Users can use algorithms from either of the two packages, but APIs may differ. Currently, `spark.ml` offers a subset of the algorithms from `spark.mllib`. Since Spark ML is an alpha component, its API may change in future releases. +* Developers should contribute new algorithms to `spark.mllib` and can optionally contribute to `spark.ml`. See below for more details. +* Spark ML only has Scala and Java APIs, whereas MLlib also has a Python API. + +**Table of Contents** + +* This will become a table of contents (this text will be scraped). +{:toc} + +# Main Concepts + +Spark ML standardizes APIs for machine learning algorithms to make it easier to combine multiple algorithms into a single pipeline, or workflow. This section covers the key concepts introduced by the Spark ML API. + +* **[ML Dataset](ml-guide.html#ml-dataset)**: Spark ML uses the [`SchemaRDD`](api/scala/index.html#org.apache.spark.sql.SchemaRDD) from Spark SQL as a dataset which can hold a variety of data types. +E.g., a dataset could have different columns storing text, feature vectors, true labels, and predictions. + +* **[`Transformer`](ml-guide.html#transformers)**: A `Transformer` is an algorithm which can transform one `SchemaRDD` into another `SchemaRDD`. +E.g., an ML model is a `Transformer` which transforms an RDD with features into an RDD with predictions. + +* **[`Estimator`](ml-guide.html#estimators)**: An `Estimator` is an algorithm which can be fit on a `SchemaRDD` to produce a `Transformer`. +E.g., a learning algorithm is an `Estimator` which trains on a dataset and produces a model. + +* **[`Pipeline`](ml-guide.html#pipeline)**: A `Pipeline` chains multiple `Transformer`s and `Estimator`s together to specify an ML workflow. + +* **[`Param`](ml-guide.html#param)**: All `Transformer`s and `Estimator`s now share a common API for specifying parameters. + +## ML Dataset + +Machine learning can be applied to a wide variety of data types, such as vectors, text, images, and structured data. +Spark ML adopts the [`SchemaRDD`](api/scala/index.html#org.apache.spark.sql.SchemaRDD) from Spark SQL in order to support a variety of data types under a unified Dataset concept. + +`SchemaRDD` supports many basic and structured types; see the [Spark SQL datatype reference](sql-programming-guide.html#spark-sql-datatype-reference) for a list of supported types. +In addition to the types listed in the Spark SQL guide, `SchemaRDD` can use ML [`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) types. + +A `SchemaRDD` can be created either implicitly or explicitly from a regular `RDD`. See the code examples below and the [Spark SQL programming guide](sql-programming-guide.html) for examples. + +Columns in a `SchemaRDD` are named. The code examples below use names such as "text," "features," and "label." + +## ML Algorithms + +### Transformers + +A [`Transformer`](api/scala/index.html#org.apache.spark.ml.Transformer) is an abstraction which includes feature transformers and learned models. Technically, a `Transformer` implements a method `transform()` which converts one `SchemaRDD` into another, generally by appending one or more columns. +For example: + +* A feature transformer might take a dataset, read a column (e.g., text), convert it into a new column (e.g., feature vectors), append the new column to the dataset, and output the updated dataset. +* A learning model might take a dataset, read the column containing feature vectors, predict the label for each feature vector, append the labels as a new column, and output the updated dataset. + +### Estimators + +An [`Estimator`](api/scala/index.html#org.apache.spark.ml.Estimator) abstracts the concept of a learning algorithm or any algorithm which fits or trains on data. Technically, an `Estimator` implements a method `fit()` which accepts a `SchemaRDD` and produces a `Transformer`. +For example, a learning algorithm such as `LogisticRegression` is an `Estimator`, and calling `fit()` trains a `LogisticRegressionModel`, which is a `Transformer`. + +### Properties of ML Algorithms + +`Transformer`s and `Estimator`s are both stateless. In the future, stateful algorithms may be supported via alternative concepts. + +Each instance of a `Transformer` or `Estimator` has a unique ID, which is useful in specifying parameters (discussed below). + +## Pipeline + +In machine learning, it is common to run a sequence of algorithms to process and learn from data. +E.g., a simple text document processing workflow might include several stages: + +* Split each document's text into words. +* Convert each document's words into a numerical feature vector. +* Learn a prediction model using the feature vectors and labels. + +Spark ML represents such a workflow as a [`Pipeline`](api/scala/index.html#org.apache.spark.ml.Pipeline), +which consists of a sequence of [`PipelineStage`s](api/scala/index.html#org.apache.spark.ml.PipelineStage) (`Transformer`s and `Estimator`s) to be run in a specific order. We will use this simple workflow as a running example in this section. + +### How It Works + +A `Pipeline` is specified as a sequence of stages, and each stage is either a `Transformer` or an `Estimator`. +These stages are run in order, and the input dataset is modified as it passes through each stage. +For `Transformer` stages, the `transform()` method is called on the dataset. +For `Estimator` stages, the `fit()` method is called to produce a `Transformer` (which becomes part of the `PipelineModel`, or fitted `Pipeline`), and that `Transformer`'s `transform()` method is called on the dataset. + +We illustrate this for the simple text document workflow. The figure below is for the *training time* usage of a `Pipeline`. + +

+ Spark ML Pipeline Example +

+ +Above, the top row represents a `Pipeline` with three stages. +The first two (`Tokenizer` and `HashingTF`) are `Transformer`s (blue), and the third (`LogisticRegression`) is an `Estimator` (red). +The bottom row represents data flowing through the pipeline, where cylinders indicate `SchemaRDD`s. +The `Pipeline.fit()` method is called on the original dataset which has raw text documents and labels. +The `Tokenizer.transform()` method splits the raw text documents into words, adding a new column with words into the dataset. +The `HashingTF.transform()` method converts the words column into feature vectors, adding a new column with those vectors to the dataset. +Now, since `LogisticRegression` is an `Estimator`, the `Pipeline` first calls `LogisticRegression.fit()` to produce a `LogisticRegressionModel`. +If the `Pipeline` had more stages, it would call the `LogisticRegressionModel`'s `transform()` method on the dataset before passing the dataset to the next stage. + +A `Pipeline` is an `Estimator`. +Thus, after a `Pipeline`'s `fit()` method runs, it produces a `PipelineModel` which is a `Transformer`. This `PipelineModel` is used at *test time*; the figure below illustrates this usage. + +

+ Spark ML PipelineModel Example +

+ +In the figure above, the `PipelineModel` has the same number of stages as the original `Pipeline`, but all `Estimator`s in the original `Pipeline` have become `Transformer`s. +When the `PipelineModel`'s `transform()` method is called on a test dataset, the data are passed through the `Pipeline` in order. +Each stage's `transform()` method updates the dataset and passes it to the next stage. + +`Pipeline`s and `PipelineModel`s help to ensure that training and test data go through identical feature processing steps. + +### Details + +*DAG `Pipeline`s*: A `Pipeline`'s stages are specified as an ordered array. The examples given here are all for linear `Pipeline`s, i.e., `Pipeline`s in which each stage uses data produced by the previous stage. It is possible to create non-linear `Pipeline`s as long as the data flow graph forms a Directed Acyclic Graph (DAG). This graph is currently specified implicitly based on the input and output column names of each stage (generally specified as parameters). If the `Pipeline` forms a DAG, then the stages must be specified in topological order. + +*Runtime checking*: Since `Pipeline`s can operate on datasets with varied types, they cannot use compile-time type checking. `Pipeline`s and `PipelineModel`s instead do runtime checking before actually running the `Pipeline`. This type checking is done using the dataset *schema*, a description of the data types of columns in the `SchemaRDD`. + +## Parameters + +Spark ML `Estimator`s and `Transformer`s use a uniform API for specifying parameters. + +A [`Param`](api/scala/index.html#org.apache.spark.ml.param.Param) is a named parameter with self-contained documentation. +A [`ParamMap`](api/scala/index.html#org.apache.spark.ml.param.ParamMap)] is a set of (parameter, value) pairs. + +There are two main ways to pass parameters to an algorithm: + +1. Set parameters for an instance. E.g., if `lr` is an instance of `LogisticRegression`, one could call `lr.setMaxIter(10)` to make `lr.fit()` use at most 10 iterations. This API resembles the API used in MLlib. +2. Pass a `ParamMap` to `fit()` or `transform()`. Any parameters in the `ParamMap` will override parameters previously specified via setter methods. + +Parameters belong to specific instances of `Estimator`s and `Transformer`s. +For example, if we have two `LogisticRegression` instances `lr1` and `lr2`, then we can build a `ParamMap` with both `maxIter` parameters specified: `ParamMap(lr1.maxIter -> 10, lr2.maxIter -> 20)`. +This is useful if there are two algorithms with the `maxIter` parameter in a `Pipeline`. + +# Code Examples + +This section gives code examples illustrating the functionality discussed above. +There is not yet documentation for specific algorithms in Spark ML. For more info, please refer to the [API Documentation](api/scala/index.html). Spark ML algorithms are currently wrappers for MLlib algorithms, and the [MLlib programming guide](mllib-guide.html) has details on specific algorithms. + +## Example: Estimator, Transformer, and Param + +This example covers the concepts of `Estimator`, `Transformer`, and `Param`. + +
+ +
+{% highlight scala %} +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.ml.classification.LogisticRegression +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.mllib.linalg.{Vector, Vectors} +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.sql.{Row, SQLContext} + +val conf = new SparkConf().setAppName("SimpleParamsExample") +val sc = new SparkContext(conf) +val sqlContext = new SQLContext(sc) +import sqlContext._ + +// Prepare training data. +// We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of case classes +// into SchemaRDDs, where it uses the case class metadata to infer the schema. +val training = sparkContext.parallelize(Seq( + LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), + LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), + LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)), + LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)))) + +// Create a LogisticRegression instance. This instance is an Estimator. +val lr = new LogisticRegression() +// Print out the parameters, documentation, and any default values. +println("LogisticRegression parameters:\n" + lr.explainParams() + "\n") + +// We may set parameters using setter methods. +lr.setMaxIter(10) + .setRegParam(0.01) + +// Learn a LogisticRegression model. This uses the parameters stored in lr. +val model1 = lr.fit(training) +// Since model1 is a Model (i.e., a Transformer produced by an Estimator), +// we can view the parameters it used during fit(). +// This prints the parameter (name: value) pairs, where names are unique IDs for this +// LogisticRegression instance. +println("Model 1 was fit using parameters: " + model1.fittingParamMap) + +// We may alternatively specify parameters using a ParamMap, +// which supports several methods for specifying parameters. +val paramMap = ParamMap(lr.maxIter -> 20) +paramMap.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter. +paramMap.put(lr.regParam -> 0.1, lr.threshold -> 0.5) // Specify multiple Params. + +// One can also combine ParamMaps. +val paramMap2 = ParamMap(lr.scoreCol -> "probability") // Changes output column name. +val paramMapCombined = paramMap ++ paramMap2 + +// Now learn a new model using the paramMapCombined parameters. +// paramMapCombined overrides all parameters set earlier via lr.set* methods. +val model2 = lr.fit(training, paramMapCombined) +println("Model 2 was fit using parameters: " + model2.fittingParamMap) + +// Prepare test documents. +val test = sparkContext.parallelize(Seq( + LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), + LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), + LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)))) + +// Make predictions on test documents using the Transformer.transform() method. +// LogisticRegression.transform will only use the 'features' column. +// Note that model2.transform() outputs a 'probability' column instead of the usual 'score' +// column since we renamed the lr.scoreCol parameter previously. +model2.transform(test) + .select('features, 'label, 'probability, 'prediction) + .collect() + .foreach { case Row(features: Vector, label: Double, prob: Double, prediction: Double) => + println("(" + features + ", " + label + ") -> prob=" + prob + ", prediction=" + prediction) + } +{% endhighlight %} +
+ +
+{% highlight java %} +import java.util.List; +import com.google.common.collect.Lists; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.classification.LogisticRegressionModel; +import org.apache.spark.ml.param.ParamMap; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.sql.api.java.JavaSQLContext; +import org.apache.spark.sql.api.java.JavaSchemaRDD; +import org.apache.spark.sql.api.java.Row; + +SparkConf conf = new SparkConf().setAppName("JavaSimpleParamsExample"); +JavaSparkContext jsc = new JavaSparkContext(conf); +JavaSQLContext jsql = new JavaSQLContext(jsc); + +// Prepare training data. +// We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of case classes +// into SchemaRDDs, where it uses the case class metadata to infer the schema. +List localTraining = Lists.newArrayList( + new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), + new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), + new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)), + new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))); +JavaSchemaRDD training = jsql.applySchema(jsc.parallelize(localTraining), LabeledPoint.class); + +// Create a LogisticRegression instance. This instance is an Estimator. +LogisticRegression lr = new LogisticRegression(); +// Print out the parameters, documentation, and any default values. +System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n"); + +// We may set parameters using setter methods. +lr.setMaxIter(10) + .setRegParam(0.01); + +// Learn a LogisticRegression model. This uses the parameters stored in lr. +LogisticRegressionModel model1 = lr.fit(training); +// Since model1 is a Model (i.e., a Transformer produced by an Estimator), +// we can view the parameters it used during fit(). +// This prints the parameter (name: value) pairs, where names are unique IDs for this +// LogisticRegression instance. +System.out.println("Model 1 was fit using parameters: " + model1.fittingParamMap()); + +// We may alternatively specify parameters using a ParamMap. +ParamMap paramMap = new ParamMap(); +paramMap.put(lr.maxIter(), 20); // Specify 1 Param. +paramMap.put(lr.maxIter(), 30); // This overwrites the original maxIter. +paramMap.put(lr.regParam(), 0.1); + +// One can also combine ParamMaps. +ParamMap paramMap2 = new ParamMap(); +paramMap2.put(lr.scoreCol(), "probability"); // Changes output column name. +ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2); + +// Now learn a new model using the paramMapCombined parameters. +// paramMapCombined overrides all parameters set earlier via lr.set* methods. +LogisticRegressionModel model2 = lr.fit(training, paramMapCombined); +System.out.println("Model 2 was fit using parameters: " + model2.fittingParamMap()); + +// Prepare test documents. +List localTest = Lists.newArrayList( + new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), + new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), + new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))); +JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), LabeledPoint.class); + +// Make predictions on test documents using the Transformer.transform() method. +// LogisticRegression.transform will only use the 'features' column. +// Note that model2.transform() outputs a 'probability' column instead of the usual 'score' +// column since we renamed the lr.scoreCol parameter previously. +model2.transform(test).registerAsTable("results"); +JavaSchemaRDD results = + jsql.sql("SELECT features, label, probability, prediction FROM results"); +for (Row r: results.collect()) { + System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2) + + ", prediction=" + r.get(3)); +} +{% endhighlight %} +
+ +
+ +## Example: Pipeline + +This example follows the simple text document `Pipeline` illustrated in the figures above. + +
+ +
+{% highlight scala %} +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.ml.Pipeline +import org.apache.spark.ml.classification.LogisticRegression +import org.apache.spark.ml.feature.{HashingTF, Tokenizer} +import org.apache.spark.sql.{Row, SQLContext} + +// Labeled and unlabeled instance types. +// Spark SQL can infer schema from case classes. +case class LabeledDocument(id: Long, text: String, label: Double) +case class Document(id: Long, text: String) + +// Set up contexts. Import implicit conversions to SchemaRDD from sqlContext. +val conf = new SparkConf().setAppName("SimpleTextClassificationPipeline") +val sc = new SparkContext(conf) +val sqlContext = new SQLContext(sc) +import sqlContext._ + +// Prepare training documents, which are labeled. +val training = sparkContext.parallelize(Seq( + LabeledDocument(0L, "a b c d e spark", 1.0), + LabeledDocument(1L, "b d", 0.0), + LabeledDocument(2L, "spark f g h", 1.0), + LabeledDocument(3L, "hadoop mapreduce", 0.0))) + +// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. +val tokenizer = new Tokenizer() + .setInputCol("text") + .setOutputCol("words") +val hashingTF = new HashingTF() + .setNumFeatures(1000) + .setInputCol(tokenizer.getOutputCol) + .setOutputCol("features") +val lr = new LogisticRegression() + .setMaxIter(10) + .setRegParam(0.01) +val pipeline = new Pipeline() + .setStages(Array(tokenizer, hashingTF, lr)) + +// Fit the pipeline to training documents. +val model = pipeline.fit(training) + +// Prepare test documents, which are unlabeled. +val test = sparkContext.parallelize(Seq( + Document(4L, "spark i j k"), + Document(5L, "l m n"), + Document(6L, "mapreduce spark"), + Document(7L, "apache hadoop"))) + +// Make predictions on test documents. +model.transform(test) + .select('id, 'text, 'score, 'prediction) + .collect() + .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) => + println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction) + } +{% endhighlight %} +
+ +
+{% highlight java %} +import java.io.Serializable; +import java.util.List; +import com.google.common.collect.Lists; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.Pipeline; +import org.apache.spark.ml.PipelineModel; +import org.apache.spark.ml.PipelineStage; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.ml.feature.HashingTF; +import org.apache.spark.ml.feature.Tokenizer; +import org.apache.spark.sql.api.java.JavaSQLContext; +import org.apache.spark.sql.api.java.JavaSchemaRDD; +import org.apache.spark.sql.api.java.Row; +import org.apache.spark.SparkConf; + +// Labeled and unlabeled instance types. +// Spark SQL can infer schema from Java Beans. +public class Document implements Serializable { + private Long id; + private String text; + + public Document(Long id, String text) { + this.id = id; + this.text = text; + } + + public Long getId() { return this.id; } + public void setId(Long id) { this.id = id; } + + public String getText() { return this.text; } + public void setText(String text) { this.text = text; } +} + +public class LabeledDocument extends Document implements Serializable { + private Double label; + + public LabeledDocument(Long id, String text, Double label) { + super(id, text); + this.label = label; + } + + public Double getLabel() { return this.label; } + public void setLabel(Double label) { this.label = label; } +} + +// Set up contexts. +SparkConf conf = new SparkConf().setAppName("JavaSimpleTextClassificationPipeline"); +JavaSparkContext jsc = new JavaSparkContext(conf); +JavaSQLContext jsql = new JavaSQLContext(jsc); + +// Prepare training documents, which are labeled. +List localTraining = Lists.newArrayList( + new LabeledDocument(0L, "a b c d e spark", 1.0), + new LabeledDocument(1L, "b d", 0.0), + new LabeledDocument(2L, "spark f g h", 1.0), + new LabeledDocument(3L, "hadoop mapreduce", 0.0)); +JavaSchemaRDD training = + jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class); + +// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. +Tokenizer tokenizer = new Tokenizer() + .setInputCol("text") + .setOutputCol("words"); +HashingTF hashingTF = new HashingTF() + .setNumFeatures(1000) + .setInputCol(tokenizer.getOutputCol()) + .setOutputCol("features"); +LogisticRegression lr = new LogisticRegression() + .setMaxIter(10) + .setRegParam(0.01); +Pipeline pipeline = new Pipeline() + .setStages(new PipelineStage[] {tokenizer, hashingTF, lr}); + +// Fit the pipeline to training documents. +PipelineModel model = pipeline.fit(training); + +// Prepare test documents, which are unlabeled. +List localTest = Lists.newArrayList( + new Document(4L, "spark i j k"), + new Document(5L, "l m n"), + new Document(6L, "mapreduce spark"), + new Document(7L, "apache hadoop")); +JavaSchemaRDD test = + jsql.applySchema(jsc.parallelize(localTest), Document.class); + +// Make predictions on test documents. +model.transform(test).registerAsTable("prediction"); +JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction"); +for (Row r: predictions.collect()) { + System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2) + + ", prediction=" + r.get(3)); +} +{% endhighlight %} +
+ +
+ +## Example: Model Selection via Cross-Validation + +An important task in ML is *model selection*, or using data to find the best model or parameters for a given task. This is also called *tuning*. +`Pipeline`s facilitate model selection by making it easy to tune an entire `Pipeline` at once, rather than tuning each element in the `Pipeline` separately. + +Currently, `spark.ml` supports model selection using the [`CrossValidator`](api/scala/index.html#org.apache.spark.ml.tuning.CrossValidator) class, which takes an `Estimator`, a set of `ParamMap`s, and an [`Evaluator`](api/scala/index.html#org.apache.spark.ml.Evaluator). +`CrossValidator` begins by splitting the dataset into a set of *folds* which are used as separate training and test datasets; e.g., with `$k=3$` folds, `CrossValidator` will generate 3 (training, test) dataset pairs, each of which uses 2/3 of the data for training and 1/3 for testing. +`CrossValidator` iterates through the set of `ParamMap`s. For each `ParamMap`, it trains the given `Estimator` and evaluates it using the given `Evaluator`. +The `ParamMap` which produces the best evaluation metric (averaged over the `$k$` folds) is selected as the best model. +`CrossValidator` finally fits the `Estimator` using the best `ParamMap` and the entire dataset. + +The following example demonstrates using `CrossValidator` to select from a grid of parameters. +To help construct the parameter grid, we use the [`ParamGridBuilder`](api/scala/index.html#org.apache.spark.ml.tuning.ParamGridGuilder) utility. + +Note that cross-validation over a grid of parameters is expensive. +E.g., in the example below, the parameter grid has 3 values for `hashingTF.numFeatures` and 2 values for `lr.regParam`, and `CrossValidator` uses 2 folds. This multiplies out to `$(3 \times 2) \times 2 = 12$` different models being trained. +In realistic settings, it can be common to try many more parameters and use more folds (`$k=3$` and `$k=10$` are common). +In other words, using `CrossValidator` can be very expensive. +However, it is also a well-established method for choosing parameters which is more statistically sound than heuristic hand-tuning. + +
+ +
+{% highlight scala %} +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.SparkContext._ +import org.apache.spark.ml.Pipeline +import org.apache.spark.ml.classification.LogisticRegression +import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator +import org.apache.spark.ml.feature.{HashingTF, Tokenizer} +import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator} +import org.apache.spark.sql.{Row, SQLContext} + +val conf = new SparkConf().setAppName("CrossValidatorExample") +val sc = new SparkContext(conf) +val sqlContext = new SQLContext(sc) +import sqlContext._ + +// Prepare training documents, which are labeled. +val training = sparkContext.parallelize(Seq( + LabeledDocument(0L, "a b c d e spark", 1.0), + LabeledDocument(1L, "b d", 0.0), + LabeledDocument(2L, "spark f g h", 1.0), + LabeledDocument(3L, "hadoop mapreduce", 0.0), + LabeledDocument(4L, "b spark who", 1.0), + LabeledDocument(5L, "g d a y", 0.0), + LabeledDocument(6L, "spark fly", 1.0), + LabeledDocument(7L, "was mapreduce", 0.0), + LabeledDocument(8L, "e spark program", 1.0), + LabeledDocument(9L, "a e c l", 0.0), + LabeledDocument(10L, "spark compile", 1.0), + LabeledDocument(11L, "hadoop software", 0.0))) + +// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. +val tokenizer = new Tokenizer() + .setInputCol("text") + .setOutputCol("words") +val hashingTF = new HashingTF() + .setInputCol(tokenizer.getOutputCol) + .setOutputCol("features") +val lr = new LogisticRegression() + .setMaxIter(10) +val pipeline = new Pipeline() + .setStages(Array(tokenizer, hashingTF, lr)) + +// We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance. +// This will allow us to jointly choose parameters for all Pipeline stages. +// A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator. +val crossval = new CrossValidator() + .setEstimator(pipeline) + .setEvaluator(new BinaryClassificationEvaluator) +// We use a ParamGridBuilder to construct a grid of parameters to search over. +// With 3 values for hashingTF.numFeatures and 2 values for lr.regParam, +// this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from. +val paramGrid = new ParamGridBuilder() + .addGrid(hashingTF.numFeatures, Array(10, 100, 1000)) + .addGrid(lr.regParam, Array(0.1, 0.01)) + .build() +crossval.setEstimatorParamMaps(paramGrid) +crossval.setNumFolds(2) // Use 3+ in practice + +// Run cross-validation, and choose the best set of parameters. +val cvModel = crossval.fit(training) +// Get the best LogisticRegression model (with the best set of parameters from paramGrid). +val lrModel = cvModel.bestModel + +// Prepare test documents, which are unlabeled. +val test = sparkContext.parallelize(Seq( + Document(4L, "spark i j k"), + Document(5L, "l m n"), + Document(6L, "mapreduce spark"), + Document(7L, "apache hadoop"))) + +// Make predictions on test documents. cvModel uses the best model found (lrModel). +cvModel.transform(test) + .select('id, 'text, 'score, 'prediction) + .collect() + .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) => + println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction) +} +{% endhighlight %} +
+ +
+{% highlight java %} +import java.util.List; +import com.google.common.collect.Lists; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.Model; +import org.apache.spark.ml.Pipeline; +import org.apache.spark.ml.PipelineStage; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator; +import org.apache.spark.ml.feature.HashingTF; +import org.apache.spark.ml.feature.Tokenizer; +import org.apache.spark.ml.param.ParamMap; +import org.apache.spark.ml.tuning.CrossValidator; +import org.apache.spark.ml.tuning.CrossValidatorModel; +import org.apache.spark.ml.tuning.ParamGridBuilder; +import org.apache.spark.sql.api.java.JavaSQLContext; +import org.apache.spark.sql.api.java.JavaSchemaRDD; +import org.apache.spark.sql.api.java.Row; + +SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample"); +JavaSparkContext jsc = new JavaSparkContext(conf); +JavaSQLContext jsql = new JavaSQLContext(jsc); + +// Prepare training documents, which are labeled. +List localTraining = Lists.newArrayList( + new LabeledDocument(0L, "a b c d e spark", 1.0), + new LabeledDocument(1L, "b d", 0.0), + new LabeledDocument(2L, "spark f g h", 1.0), + new LabeledDocument(3L, "hadoop mapreduce", 0.0), + new LabeledDocument(4L, "b spark who", 1.0), + new LabeledDocument(5L, "g d a y", 0.0), + new LabeledDocument(6L, "spark fly", 1.0), + new LabeledDocument(7L, "was mapreduce", 0.0), + new LabeledDocument(8L, "e spark program", 1.0), + new LabeledDocument(9L, "a e c l", 0.0), + new LabeledDocument(10L, "spark compile", 1.0), + new LabeledDocument(11L, "hadoop software", 0.0)); +JavaSchemaRDD training = + jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class); + +// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. +Tokenizer tokenizer = new Tokenizer() + .setInputCol("text") + .setOutputCol("words"); +HashingTF hashingTF = new HashingTF() + .setNumFeatures(1000) + .setInputCol(tokenizer.getOutputCol()) + .setOutputCol("features"); +LogisticRegression lr = new LogisticRegression() + .setMaxIter(10) + .setRegParam(0.01); +Pipeline pipeline = new Pipeline() + .setStages(new PipelineStage[] {tokenizer, hashingTF, lr}); + +// We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance. +// This will allow us to jointly choose parameters for all Pipeline stages. +// A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator. +CrossValidator crossval = new CrossValidator() + .setEstimator(pipeline) + .setEvaluator(new BinaryClassificationEvaluator()); +// We use a ParamGridBuilder to construct a grid of parameters to search over. +// With 3 values for hashingTF.numFeatures and 2 values for lr.regParam, +// this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from. +ParamMap[] paramGrid = new ParamGridBuilder() + .addGrid(hashingTF.numFeatures(), new int[]{10, 100, 1000}) + .addGrid(lr.regParam(), new double[]{0.1, 0.01}) + .build(); +crossval.setEstimatorParamMaps(paramGrid); +crossval.setNumFolds(2); // Use 3+ in practice + +// Run cross-validation, and choose the best set of parameters. +CrossValidatorModel cvModel = crossval.fit(training); +// Get the best LogisticRegression model (with the best set of parameters from paramGrid). +Model lrModel = cvModel.bestModel(); + +// Prepare test documents, which are unlabeled. +List localTest = Lists.newArrayList( + new Document(4L, "spark i j k"), + new Document(5L, "l m n"), + new Document(6L, "mapreduce spark"), + new Document(7L, "apache hadoop")); +JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), Document.class); + +// Make predictions on test documents. cvModel uses the best model found (lrModel). +cvModel.transform(test).registerAsTable("prediction"); +JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction"); +for (Row r: predictions.collect()) { + System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2) + + ", prediction=" + r.get(3)); +} +{% endhighlight %} +
+ +
+ +# Dependencies + +Spark ML currently depends on MLlib and has the same dependencies. +Please see the [MLlib Dependencies guide](mllib-guide.html#Dependencies) for more info. + +Spark ML also depends upon Spark SQL, but the relevant parts of Spark SQL do not bring additional dependencies. + +# Developers + +**Development plan** + +If all goes well, `spark.ml` will become the primary ML package at the time of the Spark 1.3 release. Initially, simple wrappers will be used to port algorithms to `spark.ml`, but eventually, code will be moved to `spark.ml` and `spark.mllib` will be deprecated. + +**Advice to developers** + +During the next development cycle, new algorithms should be contributed to `spark.mllib`, but we welcome patches sent to either package. If an algorithm is best expressed using the new API (e.g., feature transformers), we may ask for developers to use the new `spark.ml` API. +Wrappers for old and new algorithms can be contributed to `spark.ml`. + +Users will be able to use algorithms from either of the two packages. The main difficulty will be the differences in APIs between the two packages. + diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index dcb6819f46cba..efd7dda310712 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -1,6 +1,6 @@ --- layout: global -title: Machine Learning Library (MLlib) +title: Machine Learning Library (MLlib) Programming Guide --- MLlib is Spark's scalable machine learning library consisting of common learning algorithms and utilities, @@ -35,6 +35,17 @@ MLlib is under active development. The APIs marked `Experimental`/`DeveloperApi` may change in future releases, and the migration guide below will explain all changes between releases. +# spark.ml: The New ML Package + +Spark 1.2 includes a new machine learning package called `spark.ml`, currently an alpha component but potentially a successor to `spark.mllib`. The `spark.ml` package aims to replace the old APIs with a cleaner, more uniform set of APIs which will help users create full machine learning pipelines. + +See the **[spark.ml programming guide](ml-guide.html)** for more information on this package. + +Users can use algorithms from either of the two packages, but APIs may differ. Currently, `spark.ml` offers a subset of the algorithms from `spark.mllib`. + +Developers should contribute new algorithms to `spark.mllib` and can optionally contribute to `spark.ml`. +See the `spark.ml` programming guide linked above for more details. + # Dependencies MLlib uses the linear algebra package [Breeze](http://www.scalanlp.org/), diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java new file mode 100644 index 0000000000000..3b156fa0482fc --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +import java.util.List; + +import com.google.common.collect.Lists; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.Model; +import org.apache.spark.ml.Pipeline; +import org.apache.spark.ml.PipelineStage; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator; +import org.apache.spark.ml.feature.HashingTF; +import org.apache.spark.ml.feature.Tokenizer; +import org.apache.spark.ml.param.ParamMap; +import org.apache.spark.ml.tuning.CrossValidator; +import org.apache.spark.ml.tuning.CrossValidatorModel; +import org.apache.spark.ml.tuning.ParamGridBuilder; +import org.apache.spark.sql.api.java.JavaSQLContext; +import org.apache.spark.sql.api.java.JavaSchemaRDD; +import org.apache.spark.sql.api.java.Row; + +/** + * A simple example demonstrating model selection using CrossValidator. + * This example also demonstrates how Pipelines are Estimators. + * + * This example uses the Java bean classes {@link org.apache.spark.examples.ml.LabeledDocument} and + * {@link org.apache.spark.examples.ml.Document} defined in the Scala example + * {@link org.apache.spark.examples.ml.SimpleTextClassificationPipeline}. + * + * Run with + *
+ * bin/run-example ml.JavaCrossValidatorExample
+ * 
+ */ +public class JavaCrossValidatorExample { + + public static void main(String[] args) { + SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample"); + JavaSparkContext jsc = new JavaSparkContext(conf); + JavaSQLContext jsql = new JavaSQLContext(jsc); + + // Prepare training documents, which are labeled. + List localTraining = Lists.newArrayList( + new LabeledDocument(0L, "a b c d e spark", 1.0), + new LabeledDocument(1L, "b d", 0.0), + new LabeledDocument(2L, "spark f g h", 1.0), + new LabeledDocument(3L, "hadoop mapreduce", 0.0), + new LabeledDocument(4L, "b spark who", 1.0), + new LabeledDocument(5L, "g d a y", 0.0), + new LabeledDocument(6L, "spark fly", 1.0), + new LabeledDocument(7L, "was mapreduce", 0.0), + new LabeledDocument(8L, "e spark program", 1.0), + new LabeledDocument(9L, "a e c l", 0.0), + new LabeledDocument(10L, "spark compile", 1.0), + new LabeledDocument(11L, "hadoop software", 0.0)); + JavaSchemaRDD training = + jsql.applySchema(jsc.parallelize(localTraining), LabeledDocument.class); + + // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. + Tokenizer tokenizer = new Tokenizer() + .setInputCol("text") + .setOutputCol("words"); + HashingTF hashingTF = new HashingTF() + .setNumFeatures(1000) + .setInputCol(tokenizer.getOutputCol()) + .setOutputCol("features"); + LogisticRegression lr = new LogisticRegression() + .setMaxIter(10) + .setRegParam(0.01); + Pipeline pipeline = new Pipeline() + .setStages(new PipelineStage[] {tokenizer, hashingTF, lr}); + + // We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance. + // This will allow us to jointly choose parameters for all Pipeline stages. + // A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator. + CrossValidator crossval = new CrossValidator() + .setEstimator(pipeline) + .setEvaluator(new BinaryClassificationEvaluator()); + // We use a ParamGridBuilder to construct a grid of parameters to search over. + // With 3 values for hashingTF.numFeatures and 2 values for lr.regParam, + // this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from. + ParamMap[] paramGrid = new ParamGridBuilder() + .addGrid(hashingTF.numFeatures(), new int[]{10, 100, 1000}) + .addGrid(lr.regParam(), new double[]{0.1, 0.01}) + .build(); + crossval.setEstimatorParamMaps(paramGrid); + crossval.setNumFolds(2); // Use 3+ in practice + + // Run cross-validation, and choose the best set of parameters. + CrossValidatorModel cvModel = crossval.fit(training); + + // Prepare test documents, which are unlabeled. + List localTest = Lists.newArrayList( + new Document(4L, "spark i j k"), + new Document(5L, "l m n"), + new Document(6L, "mapreduce spark"), + new Document(7L, "apache hadoop")); + JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), Document.class); + + // Make predictions on test documents. cvModel uses the best model found (lrModel). + cvModel.transform(test).registerAsTable("prediction"); + JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction"); + for (Row r: predictions.collect()) { + System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2) + + ", prediction=" + r.get(3)); + } + } +} diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java new file mode 100644 index 0000000000000..cf58f4dfaa15b --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +import java.util.List; + +import com.google.common.collect.Lists; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.classification.LogisticRegressionModel; +import org.apache.spark.ml.param.ParamMap; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.sql.api.java.JavaSQLContext; +import org.apache.spark.sql.api.java.JavaSchemaRDD; +import org.apache.spark.sql.api.java.Row; + +/** + * A simple example demonstrating ways to specify parameters for Estimators and Transformers. + * Run with + * {{{ + * bin/run-example ml.JavaSimpleParamsExample + * }}} + */ +public class JavaSimpleParamsExample { + + public static void main(String[] args) { + SparkConf conf = new SparkConf().setAppName("JavaSimpleParamsExample"); + JavaSparkContext jsc = new JavaSparkContext(conf); + JavaSQLContext jsql = new JavaSQLContext(jsc); + + // Prepare training data. + // We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of Java Beans + // into SchemaRDDs, where it uses the bean metadata to infer the schema. + List localTraining = Lists.newArrayList( + new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), + new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), + new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)), + new LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))); + JavaSchemaRDD training = jsql.applySchema(jsc.parallelize(localTraining), LabeledPoint.class); + + // Create a LogisticRegression instance. This instance is an Estimator. + LogisticRegression lr = new LogisticRegression(); + // Print out the parameters, documentation, and any default values. + System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n"); + + // We may set parameters using setter methods. + lr.setMaxIter(10) + .setRegParam(0.01); + + // Learn a LogisticRegression model. This uses the parameters stored in lr. + LogisticRegressionModel model1 = lr.fit(training); + // Since model1 is a Model (i.e., a Transformer produced by an Estimator), + // we can view the parameters it used during fit(). + // This prints the parameter (name: value) pairs, where names are unique IDs for this + // LogisticRegression instance. + System.out.println("Model 1 was fit using parameters: " + model1.fittingParamMap()); + + // We may alternatively specify parameters using a ParamMap. + ParamMap paramMap = new ParamMap(); + paramMap.put(lr.maxIter().w(20)); // Specify 1 Param. + paramMap.put(lr.maxIter(), 30); // This overwrites the original maxIter. + paramMap.put(lr.regParam().w(0.1), lr.threshold().w(0.55)); // Specify multiple Params. + + // One can also combine ParamMaps. + ParamMap paramMap2 = new ParamMap(); + paramMap2.put(lr.scoreCol().w("probability")); // Change output column name + ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2); + + // Now learn a new model using the paramMapCombined parameters. + // paramMapCombined overrides all parameters set earlier via lr.set* methods. + LogisticRegressionModel model2 = lr.fit(training, paramMapCombined); + System.out.println("Model 2 was fit using parameters: " + model2.fittingParamMap()); + + // Prepare test documents. + List localTest = Lists.newArrayList( + new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), + new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), + new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))); + JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), LabeledPoint.class); + + // Make predictions on test documents using the Transformer.transform() method. + // LogisticRegression.transform will only use the 'features' column. + // Note that model2.transform() outputs a 'probability' column instead of the usual 'score' + // column since we renamed the lr.scoreCol parameter previously. + model2.transform(test).registerAsTable("results"); + JavaSchemaRDD results = + jsql.sql("SELECT features, label, probability, prediction FROM results"); + for (Row r: results.collect()) { + System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2) + + ", prediction=" + r.get(3)); + } + } +} diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java index 22ba68d8c354c..54f18014e4b2f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java @@ -80,14 +80,14 @@ public static void main(String[] args) { new Document(5L, "l m n"), new Document(6L, "mapreduce spark"), new Document(7L, "apache hadoop")); - JavaSchemaRDD test = - jsql.applySchema(jsc.parallelize(localTest), Document.class); + JavaSchemaRDD test = jsql.applySchema(jsc.parallelize(localTest), Document.class); // Make predictions on test documents. model.transform(test).registerAsTable("prediction"); JavaSchemaRDD predictions = jsql.sql("SELECT id, text, score, prediction FROM prediction"); for (Row r: predictions.collect()) { - System.out.println(r); + System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> score=" + r.get(2) + + ", prediction=" + r.get(3)); } } } diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala new file mode 100644 index 0000000000000..ce6bc066bd70d --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.SparkContext._ +import org.apache.spark.ml.Pipeline +import org.apache.spark.ml.classification.LogisticRegression +import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator +import org.apache.spark.ml.feature.{HashingTF, Tokenizer} +import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator} +import org.apache.spark.sql.{Row, SQLContext} + +/** + * A simple example demonstrating model selection using CrossValidator. + * This example also demonstrates how Pipelines are Estimators. + * + * This example uses the [[LabeledDocument]] and [[Document]] case classes from + * [[SimpleTextClassificationPipeline]]. + * + * Run with + * {{{ + * bin/run-example ml.CrossValidatorExample + * }}} + */ +object CrossValidatorExample { + + def main(args: Array[String]) { + val conf = new SparkConf().setAppName("CrossValidatorExample") + val sc = new SparkContext(conf) + val sqlContext = new SQLContext(sc) + import sqlContext._ + + // Prepare training documents, which are labeled. + val training = sparkContext.parallelize(Seq( + LabeledDocument(0L, "a b c d e spark", 1.0), + LabeledDocument(1L, "b d", 0.0), + LabeledDocument(2L, "spark f g h", 1.0), + LabeledDocument(3L, "hadoop mapreduce", 0.0), + LabeledDocument(4L, "b spark who", 1.0), + LabeledDocument(5L, "g d a y", 0.0), + LabeledDocument(6L, "spark fly", 1.0), + LabeledDocument(7L, "was mapreduce", 0.0), + LabeledDocument(8L, "e spark program", 1.0), + LabeledDocument(9L, "a e c l", 0.0), + LabeledDocument(10L, "spark compile", 1.0), + LabeledDocument(11L, "hadoop software", 0.0))) + + // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. + val tokenizer = new Tokenizer() + .setInputCol("text") + .setOutputCol("words") + val hashingTF = new HashingTF() + .setInputCol(tokenizer.getOutputCol) + .setOutputCol("features") + val lr = new LogisticRegression() + .setMaxIter(10) + val pipeline = new Pipeline() + .setStages(Array(tokenizer, hashingTF, lr)) + + // We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance. + // This will allow us to jointly choose parameters for all Pipeline stages. + // A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator. + val crossval = new CrossValidator() + .setEstimator(pipeline) + .setEvaluator(new BinaryClassificationEvaluator) + // We use a ParamGridBuilder to construct a grid of parameters to search over. + // With 3 values for hashingTF.numFeatures and 2 values for lr.regParam, + // this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from. + val paramGrid = new ParamGridBuilder() + .addGrid(hashingTF.numFeatures, Array(10, 100, 1000)) + .addGrid(lr.regParam, Array(0.1, 0.01)) + .build() + crossval.setEstimatorParamMaps(paramGrid) + crossval.setNumFolds(2) // Use 3+ in practice + + // Run cross-validation, and choose the best set of parameters. + val cvModel = crossval.fit(training) + + // Prepare test documents, which are unlabeled. + val test = sparkContext.parallelize(Seq( + Document(4L, "spark i j k"), + Document(5L, "l m n"), + Document(6L, "mapreduce spark"), + Document(7L, "apache hadoop"))) + + // Make predictions on test documents. cvModel uses the best model found (lrModel). + cvModel.transform(test) + .select('id, 'text, 'score, 'prediction) + .collect() + .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) => + println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction) + } + } +} diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala new file mode 100644 index 0000000000000..44d5b084c269a --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml + +import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.SparkContext._ +import org.apache.spark.ml.classification.LogisticRegression +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.mllib.linalg.{Vector, Vectors} +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.sql.{Row, SQLContext} + +/** + * A simple example demonstrating ways to specify parameters for Estimators and Transformers. + * Run with + * {{{ + * bin/run-example ml.SimpleParamsExample + * }}} + */ +object SimpleParamsExample { + + def main(args: Array[String]) { + val conf = new SparkConf().setAppName("SimpleParamsExample") + val sc = new SparkContext(conf) + val sqlContext = new SQLContext(sc) + import sqlContext._ + + // Prepare training data. + // We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of Java Beans + // into SchemaRDDs, where it uses the bean metadata to infer the schema. + val training = sparkContext.parallelize(Seq( + LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), + LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), + LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)), + LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5)))) + + // Create a LogisticRegression instance. This instance is an Estimator. + val lr = new LogisticRegression() + // Print out the parameters, documentation, and any default values. + println("LogisticRegression parameters:\n" + lr.explainParams() + "\n") + + // We may set parameters using setter methods. + lr.setMaxIter(10) + .setRegParam(0.01) + + // Learn a LogisticRegression model. This uses the parameters stored in lr. + val model1 = lr.fit(training) + // Since model1 is a Model (i.e., a Transformer produced by an Estimator), + // we can view the parameters it used during fit(). + // This prints the parameter (name: value) pairs, where names are unique IDs for this + // LogisticRegression instance. + println("Model 1 was fit using parameters: " + model1.fittingParamMap) + + // We may alternatively specify parameters using a ParamMap, + // which supports several methods for specifying parameters. + val paramMap = ParamMap(lr.maxIter -> 20) + paramMap.put(lr.maxIter, 30) // Specify 1 Param. This overwrites the original maxIter. + paramMap.put(lr.regParam -> 0.1, lr.threshold -> 0.55) // Specify multiple Params. + + // One can also combine ParamMaps. + val paramMap2 = ParamMap(lr.scoreCol -> "probability") // Change output column name + val paramMapCombined = paramMap ++ paramMap2 + + // Now learn a new model using the paramMapCombined parameters. + // paramMapCombined overrides all parameters set earlier via lr.set* methods. + val model2 = lr.fit(training, paramMapCombined) + println("Model 2 was fit using parameters: " + model2.fittingParamMap) + + // Prepare test documents. + val test = sparkContext.parallelize(Seq( + LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), + LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), + LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)))) + + // Make predictions on test documents using the Transformer.transform() method. + // LogisticRegression.transform will only use the 'features' column. + // Note that model2.transform() outputs a 'probability' column instead of the usual 'score' + // column since we renamed the lr.scoreCol parameter previously. + model2.transform(test) + .select('features, 'label, 'probability, 'prediction) + .collect() + .foreach { case Row(features: Vector, label: Double, prob: Double, prediction: Double) => + println("(" + features + ", " + label + ") -> prob=" + prob + ", prediction=" + prediction) + } + } +} diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala index ee7897d9062d9..92895a05e479a 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala @@ -20,10 +20,11 @@ package org.apache.spark.examples.ml import scala.beans.BeanInfo import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.SparkContext._ import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.feature.{HashingTF, Tokenizer} -import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.{Row, SQLContext} @BeanInfo case class LabeledDocument(id: Long, text: String, label: Double) @@ -81,6 +82,8 @@ object SimpleTextClassificationPipeline { model.transform(test) .select('id, 'text, 'score, 'prediction) .collect() - .foreach(println) + .foreach { case Row(id: Long, text: String, score: Double, prediction: Double) => + println("(" + id + ", " + text + ") --> score=" + score + ", prediction=" + prediction) + } } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index e545df1e37b9c..081a574beea5d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -162,11 +162,15 @@ class PipelineModel private[ml] ( } override def transform(dataset: SchemaRDD, paramMap: ParamMap): SchemaRDD = { - transformSchema(dataset.schema, paramMap, logging = true) - stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur, paramMap)) + // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap + val map = (fittingParamMap ++ this.paramMap) ++ paramMap + transformSchema(dataset.schema, map, logging = true) + stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur, map)) } private[ml] override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur, paramMap)) + // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap + val map = (fittingParamMap ++ this.paramMap) ++ paramMap + stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur, map)) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala index 490e6609ad311..23fbd228d01cb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala @@ -18,16 +18,14 @@ package org.apache.spark.ml import scala.annotation.varargs -import scala.reflect.runtime.universe.TypeTag import org.apache.spark.Logging import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.param._ import org.apache.spark.sql.SchemaRDD import org.apache.spark.sql.api.java.JavaSchemaRDD -import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.Star -import org.apache.spark.sql.catalyst.dsl._ +import org.apache.spark.sql.catalyst.expressions.ScalaUdf import org.apache.spark.sql.catalyst.types._ /** @@ -86,7 +84,7 @@ abstract class Transformer extends PipelineStage with Params { * Abstract class for transformers that take one input column, apply transformation, and output the * result as a new column. */ -private[ml] abstract class UnaryTransformer[IN, OUT: TypeTag, T <: UnaryTransformer[IN, OUT, T]] +private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]] extends Transformer with HasInputCol with HasOutputCol with Logging { def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T] @@ -99,6 +97,11 @@ private[ml] abstract class UnaryTransformer[IN, OUT: TypeTag, T <: UnaryTransfor */ protected def createTransformFunc(paramMap: ParamMap): IN => OUT + /** + * Returns the data type of the output column. + */ + protected def outputDataType: DataType + /** * Validates the input type. Throw an exception if it is invalid. */ @@ -111,9 +114,8 @@ private[ml] abstract class UnaryTransformer[IN, OUT: TypeTag, T <: UnaryTransfor if (schema.fieldNames.contains(map(outputCol))) { throw new IllegalArgumentException(s"Output column ${map(outputCol)} already exists.") } - val output = ScalaReflection.schemaFor[OUT] val outputFields = schema.fields :+ - StructField(map(outputCol), output.dataType, output.nullable) + StructField(map(outputCol), outputDataType, !outputDataType.isPrimitive) StructType(outputFields) } @@ -121,7 +123,7 @@ private[ml] abstract class UnaryTransformer[IN, OUT: TypeTag, T <: UnaryTransfor transformSchema(dataset.schema, paramMap, logging = true) import dataset.sqlContext._ val map = this.paramMap ++ paramMap - val udf = this.createTransformFunc(map) - dataset.select(Star(None), udf.call(map(inputCol).attr) as map(outputCol)) + val udf = ScalaUdf(this.createTransformFunc(map), outputDataType, Seq(map(inputCol).attr)) + dataset.select(Star(None), udf as map(outputCol)) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index b98b1755a3584..e0bfb1e484a2e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -21,7 +21,8 @@ import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{IntParam, ParamMap} import org.apache.spark.mllib.feature -import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.linalg.{VectorUDT, Vector} +import org.apache.spark.sql.catalyst.types.DataType /** * :: AlphaComponent :: @@ -39,4 +40,6 @@ class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] { val hashingTF = new feature.HashingTF(paramMap(numFeatures)) hashingTF.transform } + + override protected def outputDataType: DataType = new VectorUDT() } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 0a6599b64c011..9352f40f372d3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.ParamMap -import org.apache.spark.sql.{DataType, StringType} +import org.apache.spark.sql.{DataType, StringType, ArrayType} /** * :: AlphaComponent :: @@ -36,4 +36,6 @@ class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] { protected override def validateInputType(inputType: DataType): Unit = { require(inputType == StringType, s"Input type must be string type but got $inputType.") } + + override protected def outputDataType: DataType = new ArrayType(StringType, false) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index 8fd46aef4b99d..4b4340af543b0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -17,13 +17,12 @@ package org.apache.spark.ml.param -import java.lang.reflect.Modifier - -import org.apache.spark.annotation.AlphaComponent - import scala.annotation.varargs import scala.collection.mutable +import java.lang.reflect.Modifier + +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.Identifiable /** @@ -221,7 +220,9 @@ class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) exten /** * Puts a list of param pairs (overwrites if the input params exists). + * Not usable from Java */ + @varargs def put(paramPairs: ParamPair[_]*): this.type = { paramPairs.foreach { p => put(p.param.asInstanceOf[Param[Any]], p.value) @@ -282,6 +283,7 @@ class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) exten * where the latter overwrites this if there exists conflicts. */ def ++(other: ParamMap): ParamMap = { + // TODO: Provide a better method name for Java users. new ParamMap(this.map ++ other.map) } @@ -290,6 +292,7 @@ class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) exten * Adds all parameters from the input param map into this param map. */ def ++=(other: ParamMap): this.type = { + // TODO: Provide a better method name for Java users. this.map ++= other.map this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 8c4c9c6cf6ae2..9fed513becddc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -96,7 +96,9 @@ private[spark] object BLAS extends Serializable with Logging { * dot(x, y) */ def dot(x: Vector, y: Vector): Double = { - require(x.size == y.size) + require(x.size == y.size, + "BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes:" + + " x.size = " + x.size + ", y.size = " + y.size) (x, y) match { case (dx: DenseVector, dy: DenseVector) => dot(dx, dy) From 7e758d709286e73d2c878d4a2d2b4606386142c7 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Thu, 4 Dec 2014 20:16:35 +0800 Subject: [PATCH 65/82] [FIX][DOC] Fix broken links in ml-guide.md and some minor changes in ScalaDoc. Author: Xiangrui Meng Closes #3601 from mengxr/SPARK-4575-fix and squashes the following commits: c559768 [Xiangrui Meng] minor code update ce94da8 [Xiangrui Meng] Java Bean -> JavaBean 0b5c182 [Xiangrui Meng] fix links in ml-guide --- docs/ml-guide.md | 8 ++++---- .../spark/examples/ml/JavaCrossValidatorExample.java | 1 - .../apache/spark/examples/ml/JavaSimpleParamsExample.java | 2 +- .../src/main/scala/org/apache/spark/ml/param/params.scala | 1 - 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/ml-guide.md b/docs/ml-guide.md index 012fbd91e698b..1c2e27341473b 100644 --- a/docs/ml-guide.md +++ b/docs/ml-guide.md @@ -31,7 +31,7 @@ E.g., a learning algorithm is an `Estimator` which trains on a dataset and produ * **[`Pipeline`](ml-guide.html#pipeline)**: A `Pipeline` chains multiple `Transformer`s and `Estimator`s together to specify an ML workflow. -* **[`Param`](ml-guide.html#param)**: All `Transformer`s and `Estimator`s now share a common API for specifying parameters. +* **[`Param`](ml-guide.html#parameters)**: All `Transformer`s and `Estimator`s now share a common API for specifying parameters. ## ML Dataset @@ -134,7 +134,7 @@ Each stage's `transform()` method updates the dataset and passes it to the next Spark ML `Estimator`s and `Transformer`s use a uniform API for specifying parameters. A [`Param`](api/scala/index.html#org.apache.spark.ml.param.Param) is a named parameter with self-contained documentation. -A [`ParamMap`](api/scala/index.html#org.apache.spark.ml.param.ParamMap)] is a set of (parameter, value) pairs. +A [`ParamMap`](api/scala/index.html#org.apache.spark.ml.param.ParamMap) is a set of (parameter, value) pairs. There are two main ways to pass parameters to an algorithm: @@ -148,7 +148,7 @@ This is useful if there are two algorithms with the `maxIter` parameter in a `Pi # Code Examples This section gives code examples illustrating the functionality discussed above. -There is not yet documentation for specific algorithms in Spark ML. For more info, please refer to the [API Documentation](api/scala/index.html). Spark ML algorithms are currently wrappers for MLlib algorithms, and the [MLlib programming guide](mllib-guide.html) has details on specific algorithms. +There is not yet documentation for specific algorithms in Spark ML. For more info, please refer to the [API Documentation](api/scala/index.html#org.apache.spark.ml.package). Spark ML algorithms are currently wrappers for MLlib algorithms, and the [MLlib programming guide](mllib-guide.html) has details on specific algorithms. ## Example: Estimator, Transformer, and Param @@ -492,7 +492,7 @@ The `ParamMap` which produces the best evaluation metric (averaged over the `$k$ `CrossValidator` finally fits the `Estimator` using the best `ParamMap` and the entire dataset. The following example demonstrates using `CrossValidator` to select from a grid of parameters. -To help construct the parameter grid, we use the [`ParamGridBuilder`](api/scala/index.html#org.apache.spark.ml.tuning.ParamGridGuilder) utility. +To help construct the parameter grid, we use the [`ParamGridBuilder`](api/scala/index.html#org.apache.spark.ml.tuning.ParamGridBuilder) utility. Note that cross-validation over a grid of parameters is expensive. E.g., in the example below, the parameter grid has 3 values for `hashingTF.numFeatures` and 2 values for `lr.regParam`, and `CrossValidator` uses 2 folds. This multiplies out to `$(3 \times 2) \times 2 = 12$` different models being trained. diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java index 3b156fa0482fc..f4b4f8d8c7b2f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java @@ -23,7 +23,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.ml.Model; import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineStage; import org.apache.spark.ml.classification.LogisticRegression; diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java index cf58f4dfaa15b..e25b271777ed4 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java @@ -47,7 +47,7 @@ public static void main(String[] args) { JavaSQLContext jsql = new JavaSQLContext(jsc); // Prepare training data. - // We use LabeledPoint, which is a case class. Spark SQL can convert RDDs of Java Beans + // We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans // into SchemaRDDs, where it uses the bean metadata to infer the schema. List localTraining = Lists.newArrayList( new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index 4b4340af543b0..04f9cfb1bfc2f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -220,7 +220,6 @@ class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) exten /** * Puts a list of param pairs (overwrites if the input params exists). - * Not usable from Java */ @varargs def put(paramPairs: ParamPair[_]*): this.type = { From 28c7acacef974fdabd2b9ecc20d0d6cf6c58728f Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Thu, 4 Dec 2014 10:21:03 -0800 Subject: [PATCH 66/82] [SPARK-4683][SQL] Add a beeline.cmd to run on Windows Tested locally with a Win7 VM. Connected to a Spark SQL Thrift server instance running on Mac OS X with the following command line: ``` bin\beeline.cmd -u jdbc:hive2://10.0.2.2:10000 -n lian ``` [Review on Reviewable](https://reviewable.io/reviews/apache/spark/3599) Author: Cheng Lian Closes #3599 from liancheng/beeline.cmd and squashes the following commits: 79092e7 [Cheng Lian] Windows script for BeeLine --- bin/beeline.cmd | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 bin/beeline.cmd diff --git a/bin/beeline.cmd b/bin/beeline.cmd new file mode 100644 index 0000000000000..8293f311029dd --- /dev/null +++ b/bin/beeline.cmd @@ -0,0 +1,21 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem + +set SPARK_HOME=%~dp0.. +cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.hive.beeline.BeeLine %* From 8106b1e36b2c2b9f5dc5d7252540e48cc3fc96d5 Mon Sep 17 00:00:00 2001 From: WangTaoTheTonic Date: Thu, 4 Dec 2014 11:52:47 -0800 Subject: [PATCH 67/82] [SPARK-4253] Ignore spark.driver.host in yarn-cluster and standalone-cluster modes In yarn-cluster and standalone-cluster modes, we don't know where driver will run until it is launched. If the `spark.driver.host` property is set on the submitting machine and propagated to the driver through SparkConf then this will lead to errors when the driver launches. This patch fixes this issue by dropping the `spark.driver.host` property in SparkSubmit when running in a cluster deploy mode. Author: WangTaoTheTonic Author: WangTao Closes #3112 from WangTaoTheTonic/SPARK4253 and squashes the following commits: ed1a25c [WangTaoTheTonic] revert unrelated formatting issue 02c4e49 [WangTao] add comment 32a3f3f [WangTaoTheTonic] ingore it in SparkSubmit instead of SparkContext 667cf24 [WangTaoTheTonic] document fix ff8d5f7 [WangTaoTheTonic] also ignore it in standalone cluster mode 2286e6b [WangTao] ignore spark.driver.host in yarn-cluster mode --- .../src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 5 +++++ .../spark/deploy/yarn/ApplicationMasterArguments.scala | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0c7d247519447..955cbd6dab96d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -281,6 +281,11 @@ object SparkSubmit { sysProps.getOrElseUpdate(k, v) } + // Ignore invalid spark.driver.host in cluster modes. + if (deployMode == CLUSTER) { + sysProps -= ("spark.driver.host") + } + // Resolve paths in certain spark properties val pathConfigs = Seq( "spark.jars", diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala index 8b32c76d14037..d76a63276d752 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala @@ -36,7 +36,7 @@ class ApplicationMasterArguments(val args: Array[String]) { var args = inputArgs - while (! args.isEmpty) { + while (!args.isEmpty) { // --num-workers, --worker-memory, and --worker-cores are deprecated since 1.0, // the properties with executor in their names are preferred. args match { From 8dae26f83818ee0f5ce8e5b083625170d2e901c5 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Thu, 4 Dec 2014 12:11:41 -0800 Subject: [PATCH 68/82] [HOTFIX] Fixing two issues with the release script. 1. The version replacement was still producing some false changes. 2. Uploads to the staging repo specifically. Author: Patrick Wendell Closes #3608 from pwendell/release-script and squashes the following commits: 3c63294 [Patrick Wendell] Fixing two issues with the release script: --- dev/create-release/create-release.sh | 31 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index e0aca467ac949..3b89aaba29609 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -39,7 +39,6 @@ RC_NAME=${RC_NAME:-rc2} M2_REPO=~/.m2/repository SPARK_REPO=$M2_REPO/org/apache/spark NEXUS_ROOT=https://repository.apache.org/service/local/staging -NEXUS_UPLOAD=$NEXUS_ROOT/deploy/maven2 NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads if [ -z "$JAVA_HOME" ]; then @@ -64,19 +63,28 @@ if [[ ! "$@" =~ --package-only ]]; then # NOTE: This is done "eagerly" i.e. we don't check if we can succesfully build # or before we coin the release commit. This helps avoid races where # other people add commits to this branch while we are in the middle of building. - old=" ${RELEASE_VERSION}-SNAPSHOT<\/version>" - new=" ${RELEASE_VERSION}<\/version>" - find . -name pom.xml -o -name package.scala | grep -v dev | xargs -I {} sed -i \ - -e "s/$old/$new/" {} + cur_ver="${RELEASE_VERSION}-SNAPSHOT" + rel_ver="${RELEASE_VERSION}" + next_ver="${NEXT_VERSION}-SNAPSHOT" + + old="^\( \{2,4\}\)${cur_ver}<\/version>$" + new="\1${rel_ver}<\/version>" + find . -name pom.xml | grep -v dev | xargs -I {} sed -i \ + -e "s/${old}/${new}/" {} + find . -name package.scala | grep -v dev | xargs -I {} sed -i \ + -e "s/${old}/${new}/" {} + git commit -a -m "Preparing Spark release $GIT_TAG" echo "Creating tag $GIT_TAG at the head of $GIT_BRANCH" git tag $GIT_TAG - old=" ${RELEASE_VERSION}<\/version>" - new=" ${NEXT_VERSION}-SNAPSHOT<\/version>" - find . -name pom.xml -o -name package.scala | grep -v dev | xargs -I {} sed -i \ + old="^\( \{2,4\}\)${rel_ver}<\/version>$" + new="\1${next_ver}<\/version>" + find . -name pom.xml | grep -v dev | xargs -I {} sed -i \ -e "s/$old/$new/" {} - git commit -a -m "Preparing development version ${NEXT_VERSION}-SNAPSHOT" + find . -name package.scala | grep -v dev | xargs -I {} sed -i \ + -e "s/${old}/${new}/" {} + git commit -a -m "Preparing development version $next_ver" git push origin $GIT_TAG git push origin HEAD:$GIT_BRANCH git checkout -f $GIT_TAG @@ -118,12 +126,13 @@ if [[ ! "$@" =~ --package-only ]]; then gpg --print-md SHA1 $file > $file.sha1 done - echo "Uplading files to $NEXUS_UPLOAD" + nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id + echo "Uplading files to $nexus_upload" for file in $(find . -type f) do # strip leading ./ file_short=$(echo $file | sed -e "s/\.\///") - dest_url="$NEXUS_UPLOAD/org/apache/spark/$file_short" + dest_url="$nexus_upload/org/apache/spark/$file_short" echo " Uploading $file_short" curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url done From 794f3aec24acb578e258532ad0590554d07958ba Mon Sep 17 00:00:00 2001 From: alexdebrie Date: Thu, 4 Dec 2014 14:13:59 -0800 Subject: [PATCH 69/82] [SPARK-4745] Fix get_existing_cluster() function with multiple security groups The current get_existing_cluster() function would only find an instance belonged to a cluster if the instance's security groups == cluster_name + "-master" (or "-slaves"). This fix allows for multiple security groups by checking if the cluster_name + "-master" security group is in the list of groups for a particular instance. Author: alexdebrie Closes #3596 from alexdebrie/master and squashes the following commits: 9d51232 [alexdebrie] Fix get_existing_cluster() function with multiple security groups --- ec2/spark_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index b83decadc2988..5f9e484212635 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -504,9 +504,9 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): active = [i for i in res.instances if is_active(i)] for inst in active: group_names = [g.name for g in inst.groups] - if group_names == [cluster_name + "-master"]: + if (cluster_name + "-master") in group_names: master_nodes.append(inst) - elif group_names == [cluster_name + "-slaves"]: + elif (cluster_name + "-slaves") in group_names: slave_nodes.append(inst) if any((master_nodes, slave_nodes)): print "Found %d master(s), %d slaves" % (len(master_nodes), len(slave_nodes)) From 743a889d2778f797aabc3b1e8146e7aa32b62a48 Mon Sep 17 00:00:00 2001 From: Saldanha Date: Thu, 4 Dec 2014 14:22:09 -0800 Subject: [PATCH 70/82] [SPARK-4459] Change groupBy type parameter from K to U Please see https://issues.apache.org/jira/browse/SPARK-4459 Author: Saldanha Closes #3327 from alokito/master and squashes the following commits: 54b1095 [Saldanha] [SPARK-4459] changed type parameter for keyBy from K to U d5f73c3 [Saldanha] [SPARK-4459] added keyBy test 316ad77 [Saldanha] SPARK-4459 changed type parameter for groupBy from K to U. 62ddd4b [Saldanha] SPARK-4459 added failing unit test --- .../apache/spark/api/java/JavaRDDLike.scala | 17 ++++---- .../java/org/apache/spark/JavaAPISuite.java | 41 +++++++++++++++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index ac42294d56def..bd451634e53d2 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -211,8 +211,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements * mapping to that key. */ - def groupBy[K](f: JFunction[T, K]): JavaPairRDD[K, JIterable[T]] = { - implicit val ctagK: ClassTag[K] = fakeClassTag + def groupBy[U](f: JFunction[T, U]): JavaPairRDD[U, JIterable[T]] = { + // The type parameter is U instead of K in order to work around a compiler bug; see SPARK-4459 + implicit val ctagK: ClassTag[U] = fakeClassTag implicit val ctagV: ClassTag[JList[T]] = fakeClassTag JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f)(fakeClassTag))) } @@ -221,10 +222,11 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements * mapping to that key. */ - def groupBy[K](f: JFunction[T, K], numPartitions: Int): JavaPairRDD[K, JIterable[T]] = { - implicit val ctagK: ClassTag[K] = fakeClassTag + def groupBy[U](f: JFunction[T, U], numPartitions: Int): JavaPairRDD[U, JIterable[T]] = { + // The type parameter is U instead of K in order to work around a compiler bug; see SPARK-4459 + implicit val ctagK: ClassTag[U] = fakeClassTag implicit val ctagV: ClassTag[JList[T]] = fakeClassTag - JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(fakeClassTag[K]))) + JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(fakeClassTag[U]))) } /** @@ -458,8 +460,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { /** * Creates tuples of the elements in this RDD by applying `f`. */ - def keyBy[K](f: JFunction[T, K]): JavaPairRDD[K, T] = { - implicit val ctag: ClassTag[K] = fakeClassTag + def keyBy[U](f: JFunction[T, U]): JavaPairRDD[U, T] = { + // The type parameter is U instead of K in order to work around a compiler bug; see SPARK-4459 + implicit val ctag: ClassTag[U] = fakeClassTag JavaPairRDD.fromRDD(rdd.keyBy(f)) } diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index 59c86eecac5e8..3ad4f2f193af4 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -323,6 +323,47 @@ public Boolean call(Integer x) { Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds } + @Test + public void groupByOnPairRDD() { + // Regression test for SPARK-4459 + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); + Function, Boolean> areOdd = + new Function, Boolean>() { + @Override + public Boolean call(Tuple2 x) { + return (x._1() % 2 == 0) && (x._2() % 2 == 0); + } + }; + JavaPairRDD pairRDD = rdd.zip(rdd); + JavaPairRDD>> oddsAndEvens = pairRDD.groupBy(areOdd); + Assert.assertEquals(2, oddsAndEvens.count()); + Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens + Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds + + oddsAndEvens = pairRDD.groupBy(areOdd, 1); + Assert.assertEquals(2, oddsAndEvens.count()); + Assert.assertEquals(2, Iterables.size(oddsAndEvens.lookup(true).get(0))); // Evens + Assert.assertEquals(5, Iterables.size(oddsAndEvens.lookup(false).get(0))); // Odds + } + + @SuppressWarnings("unchecked") + @Test + public void keyByOnPairRDD() { + // Regression test for SPARK-4459 + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); + Function, String> sumToString = + new Function, String>() { + @Override + public String call(Tuple2 x) { + return String.valueOf(x._1() + x._2()); + } + }; + JavaPairRDD pairRDD = rdd.zip(rdd); + JavaPairRDD> keyed = pairRDD.keyBy(sumToString); + Assert.assertEquals(7, keyed.count()); + Assert.assertEquals(1, (long) keyed.lookup("2").get(0)._1()); + } + @SuppressWarnings("unchecked") @Test public void cogroup() { From ab8177da2defab1ecd8bc0cd5a21f07be5b8d2c5 Mon Sep 17 00:00:00 2001 From: lewuathe Date: Thu, 4 Dec 2014 15:14:36 -0800 Subject: [PATCH 71/82] [SPARK-4652][DOCS] Add docs about spark-git-repo option There might be some cases when WIPS spark version need to be run on EC2 cluster. In order to setup this type of cluster more easily, add --spark-git-repo option description to ec2 documentation. Author: lewuathe Author: Josh Rosen Closes #3513 from Lewuathe/doc-for-development-spark-cluster and squashes the following commits: 6dae8ee [lewuathe] Wrap consistent with other descriptions cfaf9be [lewuathe] Add docs about spark-git-repo option (Editing / cleanup by Josh Rosen) --- docs/ec2-scripts.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md index 66bf5f1a855ed..ed51d0abb3a45 100644 --- a/docs/ec2-scripts.md +++ b/docs/ec2-scripts.md @@ -85,6 +85,11 @@ another. specified version of Spark. The `` can be a version number (e.g. "0.7.3") or a specific git hash. By default, a recent version will be used. +- `--spark-git-repo=` will let you run a custom version of + Spark that is built from the given git repository. By default, the + [Apache Github mirror](https://github.com/apache/spark) will be used. + When using a custom Spark version, `--spark-version` must be set to git + commit hash, such as 317e114, instead of a version number. - If one of your launches fails due to e.g. not having the right permissions on your private key file, you can run `launch` with the `--resume` option to restart the setup process on an existing cluster. From ed92b47e83c2882f0e76da78dc268577df820382 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 4 Dec 2014 16:32:20 -0800 Subject: [PATCH 72/82] [SPARK-4397] Move object RDD to the front of RDD.scala. I ran into multiple cases that SBT/Scala compiler was confused by the implicits in continuous compilation mode. Adding explicit return types fixes the problem. Author: Reynold Xin Closes #3580 from rxin/rdd-implicit and squashes the following commits: ee32fcd [Reynold Xin] Move object RDD to the end of the file. b8562c9 [Reynold Xin] Merge branch 'master' of github.com:apache/spark into rdd-implicit d4e9f85 [Reynold Xin] Code review. a836a37 [Reynold Xin] Move object RDD to the front of RDD.scala. --- .../scala/org/apache/spark/SparkContext.scala | 8 ++--- .../main/scala/org/apache/spark/rdd/RDD.scala | 29 ++++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 532f292952f05..aded7c12e274e 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1758,7 +1758,7 @@ object SparkContext extends Logging { @deprecated("Replaced by implicit functions in WritableConverter. This is kept here only for " + "backward compatibility.", "1.3.0") - def writableWritableConverter[T <: Writable]() = + def writableWritableConverter[T <: Writable](): WritableConverter[T] = WritableConverter.writableWritableConverter() /** @@ -2017,15 +2017,15 @@ object WritableConverter { simpleWritableConverter[Boolean, BooleanWritable](_.get) implicit def bytesWritableConverter(): WritableConverter[Array[Byte]] = { - simpleWritableConverter[Array[Byte], BytesWritable](bw => + simpleWritableConverter[Array[Byte], BytesWritable] { bw => // getBytes method returns array which is longer then data to be returned Arrays.copyOfRange(bw.getBytes, 0, bw.getLength) - ) + } } implicit def stringWritableConverter(): WritableConverter[String] = simpleWritableConverter[String, Text](_.toString) - implicit def writableWritableConverter[T <: Writable]() = + implicit def writableWritableConverter[T <: Writable](): WritableConverter[T] = new WritableConverter[T](_.runtimeClass.asInstanceOf[Class[T]], _.asInstanceOf[T]) } diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 0bd616ec24fcb..214f22bc5b603 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1398,6 +1398,13 @@ abstract class RDD[T: ClassTag]( } } + +/** + * Defines implicit functions that provide extra functionalities on RDDs of specific types. + * + * For example, [[RDD.rddToPairRDDFunctions]] converts an RDD into a [[PairRDDFunctions]] for + * key-value-pair RDDs, and enabling extra functionalities such as [[PairRDDFunctions.reduceByKey]]. + */ object RDD { // The following implicit functions were in SparkContext before 1.2 and users had to @@ -1406,22 +1413,30 @@ object RDD { // compatibility and forward to the following functions directly. implicit def rddToPairRDDFunctions[K, V](rdd: RDD[(K, V)]) - (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null) = { + (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null): PairRDDFunctions[K, V] = { new PairRDDFunctions(rdd) } - implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = new AsyncRDDActions(rdd) + implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]): AsyncRDDActions[T] = { + new AsyncRDDActions(rdd) + } implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag]( - rdd: RDD[(K, V)]) = + rdd: RDD[(K, V)]): SequenceFileRDDFunctions[K, V] = { new SequenceFileRDDFunctions(rdd) + } - implicit def rddToOrderedRDDFunctions[K : Ordering : ClassTag, V: ClassTag]( - rdd: RDD[(K, V)]) = + implicit def rddToOrderedRDDFunctions[K : Ordering : ClassTag, V: ClassTag](rdd: RDD[(K, V)]) + : OrderedRDDFunctions[K, V, (K, V)] = { new OrderedRDDFunctions[K, V, (K, V)](rdd) + } - implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd) + implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]): DoubleRDDFunctions = { + new DoubleRDDFunctions(rdd) + } - implicit def numericRDDToDoubleRDDFunctions[T](rdd: RDD[T])(implicit num: Numeric[T]) = + implicit def numericRDDToDoubleRDDFunctions[T](rdd: RDD[T])(implicit num: Numeric[T]) + : DoubleRDDFunctions = { new DoubleRDDFunctions(rdd.map(x => num.toDouble(x))) + } } From ddfc09c36381a0880dfa6778be2ca0bc7d80febf Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Thu, 4 Dec 2014 18:14:36 -0800 Subject: [PATCH 73/82] [SPARK-4421] Wrong link in spark-standalone.html Modified the link of building Spark. Author: Masayoshi TSUZUKI Closes #3279 from tsudukim/feature/SPARK-4421 and squashes the following commits: 56e31c1 [Masayoshi TSUZUKI] Modified the link of building Spark. --- docs/spark-standalone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index a3028aa86dc45..d503bc961e200 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -10,7 +10,7 @@ In addition to running on the Mesos or YARN cluster managers, Spark also provide # Installing Spark Standalone to a Cluster -To install Spark Standalone mode, you simply place a compiled version of Spark on each node on the cluster. You can obtain pre-built versions of Spark with each release or [build it yourself](index.html#building). +To install Spark Standalone mode, you simply place a compiled version of Spark on each node on the cluster. You can obtain pre-built versions of Spark with each release or [build it yourself](building-spark.html). # Starting a Cluster Manually From 15cf3b0125fe238dea2ce13e703034ba7cef477f Mon Sep 17 00:00:00 2001 From: Andy Konwinski Date: Thu, 4 Dec 2014 18:27:02 -0800 Subject: [PATCH 74/82] Fix typo in Spark SQL docs. Author: Andy Konwinski Closes #3611 from andyk/patch-3 and squashes the following commits: 7bab333 [Andy Konwinski] Fix typo in Spark SQL docs. --- docs/sql-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 85d446b9da0e7..be284fbe217a5 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -278,7 +278,7 @@ performed on JSON files. from pyspark.sql import SQLContext, Row sqlContext = SQLContext(sc) -# Load a text file and convert each line to a dictionary. +# Load a text file and convert each line to a Row. lines = sc.textFile("examples/src/main/resources/people.txt") parts = lines.map(lambda l: l.split(",")) people = parts.map(lambda p: Row(name=p[0], age=int(p[1]))) From ca379039f701e423fa07933db4e063cb85d0236a Mon Sep 17 00:00:00 2001 From: Masayoshi TSUZUKI Date: Thu, 4 Dec 2014 19:33:02 -0800 Subject: [PATCH 75/82] [SPARK-4464] Description about configuration options need to be modified in docs. Added description about -h and -host. Modified description about -i and -ip which are now deprecated. Added description about --properties-file. Author: Masayoshi TSUZUKI Closes #3329 from tsudukim/feature/SPARK-4464 and squashes the following commits: 6c07caf [Masayoshi TSUZUKI] [SPARK-4464] Description about configuration options need to be modified in docs. --- docs/spark-standalone.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index d503bc961e200..ae7b81d5bb71f 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -34,8 +34,12 @@ Finally, the following configuration options can be passed to the master and wor - - + + + + + + @@ -57,6 +61,10 @@ Finally, the following configuration options can be passed to the master and wor + + + +
ArgumentMeaning
-i IP, --ip IPIP address or DNS name to listen on-h HOST, --host HOSTHostname to listen on
-i HOST, --ip HOSTHostname to listen on (deprecated, use -h or --host)
-p PORT, --port PORT-d DIR, --work-dir DIR Directory to use for scratch space and job output logs (default: SPARK_HOME/work); only on worker
--properties-file FILEPath to a custom Spark properties file to load (default: conf/spark-defaults.conf)
From 87437df036305c6f467dc6e0bf7891d27d266a6b Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 4 Dec 2014 21:53:38 -0800 Subject: [PATCH 76/82] Revert "[HOT FIX] [YARN] Check whether `/lib` exists before listing its files" This reverts commit 90ec643e9af4c8bbb9000edca08c07afb17939c7. --- .../apache/spark/deploy/yarn/ClientBase.scala | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 290d9943a5077..8e4360ea4476b 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -243,21 +243,18 @@ private[spark] trait ClientBase extends Logging { val libsURI = new URI(libsDir) val jarLinks = ListBuffer.empty[String] if (libsURI.getScheme != LOCAL_SCHEME) { - val localPath = getQualifiedLocalPath(libsURI) - val localFs = FileSystem.get(localPath.toUri, hadoopConf) - if (localFs.exists(localPath)) { - val jars = localFs.listFiles(localPath, /* recursive */ false) - while (jars.hasNext) { - val jar = jars.next() - val name = jar.getPath.getName - if (name.startsWith("datanucleus-")) { - // copy to remote and add to classpath - val src = jar.getPath - val destPath = copyFileToRemote(dst, src, replication) - distCacheMgr.addResource(localFs, hadoopConf, destPath, - localResources, LocalResourceType.FILE, name, statCache) - jarLinks += name - } + val localURI = getQualifiedLocalPath(libsURI).toUri() + val jars = FileSystem.get(localURI, hadoopConf).listFiles(new Path(localURI.getPath), false) + while (jars.hasNext) { + val jar = jars.next() + val name = jar.getPath.getName + if (name.startsWith("datanucleus-")) { + // copy to remote and add to classpath + val src = jar.getPath + val destPath = copyFileToRemote(dst, src, replication) + distCacheMgr.addResource(fs, hadoopConf, destPath, + localResources, LocalResourceType.FILE, name, statCache) + jarLinks += name } } } else { From fd8525334c443fbdc7dd58a62cb1aecae51b460b Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 4 Dec 2014 21:53:49 -0800 Subject: [PATCH 77/82] Revert "SPARK-2624 add datanucleus jars to the container in yarn-cluster" This reverts commit a975dc32799bb8a14f9e1c76defaaa7cfbaf8b53. --- docs/running-on-yarn.md | 15 ---- .../apache/spark/deploy/yarn/ClientBase.scala | 66 ---------------- .../spark/deploy/yarn/ClientBaseSuite.scala | 76 ------------------- 3 files changed, 157 deletions(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index e97ac9f0c4a00..16897dbb65311 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -139,21 +139,6 @@ Most of the configs are the same for Spark on YARN as for other deployment modes The maximum number of threads to use in the application master for launching executor containers. - - spark.yarn.datanucleus.dir - $SPARK_HOME/lib - - The location of the DataNucleus jars, in case overriding the default location is desired. - By default, Spark on YARN will use the DataNucleus jars installed at - $SPARK_HOME/lib, but the jars can also be in a world-readable location on HDFS. - This allows YARN to cache it on nodes so that it doesn't need to be distributed each time an - application runs. To point to a directory on HDFS, for example, set this configuration to - "hdfs:///some/path". - - This is required because the datanucleus jars cannot be packaged into the - assembly jar due to metadata conflicts (involving plugin.xml.) - - # Launching Spark on YARN diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 8e4360ea4476b..f95d72379171c 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -18,7 +18,6 @@ package org.apache.spark.deploy.yarn import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException} -import java.io.{File, FilenameFilter} import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} @@ -224,48 +223,10 @@ private[spark] trait ClientBase extends Logging { } } } - if (cachedSecondaryJarLinks.nonEmpty) { sparkConf.set(CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(",")) } - /** - * Do the same for datanucleus jars, if they exist in spark home. Find all datanucleus-* jars, - * copy them to the remote fs, and add them to the class path. - * - * This is necessary because the datanucleus jars cannot be included in the assembly jar due - * to metadata conflicts involving plugin.xml. At the time of writing, these are the only - * jars that cannot be distributed with the uber jar and have to be treated differently. - * - * For more details, see SPARK-2624, and https://github.com/apache/spark/pull/3238 - */ - for (libsDir <- dataNucleusJarsDir(sparkConf)) { - val libsURI = new URI(libsDir) - val jarLinks = ListBuffer.empty[String] - if (libsURI.getScheme != LOCAL_SCHEME) { - val localURI = getQualifiedLocalPath(libsURI).toUri() - val jars = FileSystem.get(localURI, hadoopConf).listFiles(new Path(localURI.getPath), false) - while (jars.hasNext) { - val jar = jars.next() - val name = jar.getPath.getName - if (name.startsWith("datanucleus-")) { - // copy to remote and add to classpath - val src = jar.getPath - val destPath = copyFileToRemote(dst, src, replication) - distCacheMgr.addResource(fs, hadoopConf, destPath, - localResources, LocalResourceType.FILE, name, statCache) - jarLinks += name - } - } - } else { - jarLinks += libsURI.toString + Path.SEPARATOR + "*" - } - - if (jarLinks.nonEmpty) { - sparkConf.set(CONF_SPARK_DATANUCLEUS_JARS, jarLinks.mkString(",")) - } - } - localResources } @@ -590,13 +551,6 @@ private[spark] object ClientBase extends Logging { // Internal config to propagate the location of the user's jar to the driver/executors val CONF_SPARK_USER_JAR = "spark.yarn.user.jar" - // Location of the datanucleus jars - val CONF_SPARK_DATANUCLEUS_DIR = "spark.yarn.datanucleus.dir" - - // Internal config to propagate the locations of datanucleus jars found to add to the - // classpath of the executors. Value should be a comma-separated list of paths to each jar. - val CONF_SPARK_DATANUCLEUS_JARS = "spark.yarn.datanucleus.jars" - // Internal config to propagate the locations of any extra jars to add to the classpath // of the executors val CONF_SPARK_YARN_SECONDARY_JARS = "spark.yarn.secondary.jars" @@ -629,19 +583,6 @@ private[spark] object ClientBase extends Logging { } } - /** - * Find the user-defined provided jars directory if configured, or return SPARK_HOME/lib if not. - * - * This method first looks for $CONF_SPARK_DATANUCLEUS_DIR inside the SparkConf, then looks for - * Spark home inside the the SparkConf and the user environment. - */ - private def dataNucleusJarsDir(conf: SparkConf): Option[String] = { - conf.getOption(CONF_SPARK_DATANUCLEUS_DIR).orElse { - val sparkHome = conf.getOption("spark.home").orElse(sys.env.get("SPARK_HOME")) - sparkHome.map(path => path + Path.SEPARATOR + "lib") - } - } - /** * Return the path to the given application's staging directory. */ @@ -743,13 +684,6 @@ private[spark] object ClientBase extends Logging { addUserClasspath(args, sparkConf, env) } - // Add datanucleus jars to classpath - for (entries <- sparkConf.getOption(CONF_SPARK_DATANUCLEUS_JARS)) { - entries.split(",").filter(_.nonEmpty).foreach { entry => - addFileToClasspath(entry, null, env) - } - } - // Append all jar files under the working directory to the classpath. addClasspathEntry(Environment.PWD.$() + Path.SEPARATOR + "*", env) } diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala index b055e9b72dc61..17b79ae1d82c4 100644 --- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala +++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala @@ -21,7 +21,6 @@ import java.io.File import java.net.URI import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -105,81 +104,6 @@ class ClientBaseSuite extends FunSuite with Matchers { cp should not contain (ClientBase.APP_JAR) } - test("DataNucleus in classpath") { - val dnJars = "local:/dn/core.jar,/dn/api.jar" - val conf = new Configuration() - val sparkConf = new SparkConf() - .set(ClientBase.CONF_SPARK_JAR, SPARK) - .set(ClientBase.CONF_SPARK_DATANUCLEUS_JARS, dnJars) - val env = new MutableHashMap[String, String]() - val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf) - - ClientBase.populateClasspath(args, conf, sparkConf, env) - - val cp = env("CLASSPATH").split(File.pathSeparator) - s"$dnJars".split(",").foreach({ entry => - val uri = new URI(entry) - if (ClientBase.LOCAL_SCHEME.equals(uri.getScheme())) { - cp should contain (uri.getPath()) - } else { - cp should not contain (uri.getPath()) - } - }) - } - - test("DataNucleus using local:") { - val dnDir = "local:/datanucleus" - val conf = new Configuration() - val sparkConf = new SparkConf() - .set(ClientBase.CONF_SPARK_JAR, SPARK) - .set(ClientBase.CONF_SPARK_DATANUCLEUS_DIR, dnDir) - val yarnConf = new YarnConfiguration() - val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf) - - val client = spy(new DummyClient(args, conf, sparkConf, yarnConf)) - doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), - any(classOf[Path]), anyShort(), anyBoolean()) - - val tempDir = Utils.createTempDir() - try { - client.prepareLocalResources(tempDir.getAbsolutePath()) - val jars = sparkConf.get(ClientBase.CONF_SPARK_DATANUCLEUS_JARS).split(",") - val uri = new URI(dnDir) - jars should contain (uri.toString + Path.SEPARATOR + "*") - } finally { - Utils.deleteRecursively(tempDir) - } - } - - test("DataNucleus using file:") { - val dnDir = Utils.createTempDir() - val tempDir = Utils.createTempDir() - - try { - // create mock datanucleus jar - val tempJar = File.createTempFile("datanucleus-", null, dnDir) - - val conf = new Configuration() - val sparkConf = new SparkConf() - .set(ClientBase.CONF_SPARK_JAR, SPARK) - .set(ClientBase.CONF_SPARK_DATANUCLEUS_DIR, dnDir.toURI.toString) - val yarnConf = new YarnConfiguration() - val args = new ClientArguments(Array("--jar", USER, "--addJars", ADDED), sparkConf) - - val client = spy(new DummyClient(args, conf, sparkConf, yarnConf)) - doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), - any(classOf[Path]), anyShort(), anyBoolean()) - - client.prepareLocalResources(tempDir.getAbsolutePath()) - - val jars = sparkConf.get(ClientBase.CONF_SPARK_DATANUCLEUS_JARS).split(",") - jars should contain (tempJar.getName) - } finally { - Utils.deleteRecursively(dnDir) - Utils.deleteRecursively(tempDir) - } - } - test("Jar path propagation through SparkConf") { val conf = new Configuration() val sparkConf = new SparkConf().set(ClientBase.CONF_SPARK_JAR, SPARK) From f5801e813f3c2573ebaf1af839341489ddd3ec78 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Thu, 4 Dec 2014 22:25:21 -0800 Subject: [PATCH 78/82] [SPARK-4753][SQL] Use catalyst for partition pruning in newParquet. Author: Michael Armbrust Closes #3613 from marmbrus/parquetPartitionPruning and squashes the following commits: 4f138f8 [Michael Armbrust] Use catalyst for partition pruning in newParquet. --- .../apache/spark/sql/parquet/newParquet.scala | 58 +++++++++---------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index 14f8659f15b3f..2e0c6c51c00e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.conf.{Configurable, Configuration} import org.apache.hadoop.io.Writable import org.apache.hadoop.mapreduce.{JobContext, InputSplit, Job} +import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate import parquet.hadoop.ParquetInputFormat import parquet.hadoop.util.ContextUtil @@ -31,8 +32,8 @@ import org.apache.spark.{Partition => SparkPartition, Logging} import org.apache.spark.rdd.{NewHadoopPartition, RDD} import org.apache.spark.sql.{SQLConf, Row, SQLContext} -import org.apache.spark.sql.catalyst.expressions.{SpecificMutableRow, And, Expression, Attribute} -import org.apache.spark.sql.catalyst.types.{IntegerType, StructField, StructType} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.types.{StringType, IntegerType, StructField, StructType} import org.apache.spark.sql.sources._ import scala.collection.JavaConversions._ @@ -151,8 +152,6 @@ case class ParquetRelation2(path: String)(@transient val sqlContext: SQLContext) override def buildScan(output: Seq[Attribute], predicates: Seq[Expression]): RDD[Row] = { // This is mostly a hack so that we can use the existing parquet filter code. val requiredColumns = output.map(_.name) - // TODO: Parquet filters should be based on data sources API, not catalyst expressions. - val filters = DataSourceStrategy.selectFilters(predicates) val job = new Job(sparkContext.hadoopConfiguration) ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport]) @@ -160,35 +159,34 @@ case class ParquetRelation2(path: String)(@transient val sqlContext: SQLContext) val requestedSchema = StructType(requiredColumns.map(schema(_))) - // TODO: Make folder based partitioning a first class citizen of the Data Sources API. - val partitionFilters = filters.collect { - case e @ EqualTo(attr, value) if partitionKeys.contains(attr) => - logInfo(s"Parquet scan partition filter: $attr=$value") - (p: Partition) => p.partitionValues(attr) == value - - case e @ In(attr, values) if partitionKeys.contains(attr) => - logInfo(s"Parquet scan partition filter: $attr IN ${values.mkString("{", ",", "}")}") - val set = values.toSet - (p: Partition) => set.contains(p.partitionValues(attr)) - - case e @ GreaterThan(attr, value) if partitionKeys.contains(attr) => - logInfo(s"Parquet scan partition filter: $attr > $value") - (p: Partition) => p.partitionValues(attr).asInstanceOf[Int] > value.asInstanceOf[Int] - - case e @ GreaterThanOrEqual(attr, value) if partitionKeys.contains(attr) => - logInfo(s"Parquet scan partition filter: $attr >= $value") - (p: Partition) => p.partitionValues(attr).asInstanceOf[Int] >= value.asInstanceOf[Int] + val partitionKeySet = partitionKeys.toSet + val rawPredicate = + predicates + .filter(_.references.map(_.name).toSet.subsetOf(partitionKeySet)) + .reduceOption(And) + .getOrElse(Literal(true)) + + // Translate the predicate so that it reads from the information derived from the + // folder structure + val castedPredicate = rawPredicate transform { + case a: AttributeReference => + val idx = partitionKeys.indexWhere(a.name == _) + BoundReference(idx, IntegerType, nullable = true) + } - case e @ LessThan(attr, value) if partitionKeys.contains(attr) => - logInfo(s"Parquet scan partition filter: $attr < $value") - (p: Partition) => p.partitionValues(attr).asInstanceOf[Int] < value.asInstanceOf[Int] + val inputData = new GenericMutableRow(partitionKeys.size) + val pruningCondition = InterpretedPredicate(castedPredicate) - case e @ LessThanOrEqual(attr, value) if partitionKeys.contains(attr) => - logInfo(s"Parquet scan partition filter: $attr <= $value") - (p: Partition) => p.partitionValues(attr).asInstanceOf[Int] <= value.asInstanceOf[Int] - } + val selectedPartitions = + if (partitionKeys.nonEmpty && predicates.nonEmpty) { + partitions.filter { part => + inputData(0) = part.partitionValues.values.head + pruningCondition(inputData) + } + } else { + partitions + } - val selectedPartitions = partitions.filter(p => partitionFilters.forall(_(p))) val fs = FileSystem.get(new java.net.URI(path), sparkContext.hadoopConfiguration) val selectedFiles = selectedPartitions.flatMap(_.files).map(f => fs.makeQualified(f.getPath)) // FileInputFormat cannot handle empty lists. From 6f61e1f961826a6c9e98a66d10b271b7e3c7dd55 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Fri, 5 Dec 2014 10:27:40 -0800 Subject: [PATCH 79/82] [SPARK-4761][SQL] Enables Kryo by default in Spark SQL Thrift server Enables Kryo and disables reference tracking by default in Spark SQL Thrift server. Configurations explicitly defined by users in `spark-defaults.conf` are respected (the Thrift server is started by `spark-submit`, which handles configuration properties properly). [Review on Reviewable](https://reviewable.io/reviews/apache/spark/3621) Author: Cheng Lian Closes #3621 from liancheng/kryo-by-default and squashes the following commits: 70c2775 [Cheng Lian] Enables Kryo by default in Spark SQL Thrift server --- .../spark/sql/hive/thriftserver/SparkSQLEnv.scala | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala index 89732c939b0ec..158c225159720 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala @@ -32,11 +32,21 @@ private[hive] object SparkSQLEnv extends Logging { def init() { if (hiveContext == null) { - val sparkConf = new SparkConf() + val sparkConf = new SparkConf(loadDefaults = true) + val maybeSerializer = sparkConf.getOption("spark.serializer") + val maybeKryoReferenceTracking = sparkConf.getOption("spark.kryo.referenceTracking") + + sparkConf .setAppName(s"SparkSQL::${java.net.InetAddress.getLocalHost.getHostName}") .set("spark.sql.hive.version", HiveShim.version) - sparkContext = new SparkContext(sparkConf) + .set( + "spark.serializer", + maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer")) + .set( + "spark.kryo.referenceTracking", + maybeKryoReferenceTracking.getOrElse("false")) + sparkContext = new SparkContext(sparkConf) sparkContext.addSparkListener(new StatsReportListener()) hiveContext = new HiveContext(sparkContext) From 98a7d09978eeb775600ff41f9cc6ae8622026b71 Mon Sep 17 00:00:00 2001 From: "Zhang, Liye" Date: Fri, 5 Dec 2014 12:00:32 -0800 Subject: [PATCH 80/82] [SPARK-4005][CORE] handle message replies in receive instead of in the individual private methods In BlockManagermasterActor, when handling message type UpdateBlockInfo, the message replies is in handled in individual private methods, should handle it in receive of Akka. Author: Zhang, Liye Closes #2853 from liyezhang556520/akkaRecv and squashes the following commits: 9b06f0a [Zhang, Liye] remove the unreachable code bf518cd [Zhang, Liye] change the indent 242166b [Zhang, Liye] modified accroding to the comments d4b929b [Zhang, Liye] [SPARK-4005][CORE] handle message replies in receive instead of in the individual private methods --- .../spark/storage/BlockManagerMasterActor.scala | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala index 685b2e11440fb..9cbda41223a8b 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala @@ -73,9 +73,8 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus case UpdateBlockInfo( blockManagerId, blockId, storageLevel, deserializedSize, size, tachyonSize) => - // TODO: Ideally we want to handle all the message replies in receive instead of in the - // individual private methods. - updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size, tachyonSize) + sender ! updateBlockInfo( + blockManagerId, blockId, storageLevel, deserializedSize, size, tachyonSize) case GetLocations(blockId) => sender ! getLocations(blockId) @@ -355,23 +354,21 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus storageLevel: StorageLevel, memSize: Long, diskSize: Long, - tachyonSize: Long) { + tachyonSize: Long): Boolean = { if (!blockManagerInfo.contains(blockManagerId)) { if (blockManagerId.isDriver && !isLocal) { // We intentionally do not register the master (except in local mode), // so we should not indicate failure. - sender ! true + return true } else { - sender ! false + return false } - return } if (blockId == null) { blockManagerInfo(blockManagerId).updateLastSeenMs() - sender ! true - return + return true } blockManagerInfo(blockManagerId).updateBlockInfo( @@ -395,7 +392,7 @@ class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus if (locations.size == 0) { blockLocations.remove(blockId) } - sender ! true + true } private def getLocations(blockId: BlockId): Seq[BlockManagerId] = { From 6eb1b6f6204ea3c8083af3fb9cd990d9f3dac89d Mon Sep 17 00:00:00 2001 From: CrazyJvm Date: Fri, 5 Dec 2014 13:42:13 -0800 Subject: [PATCH 81/82] Streaming doc : do you mean inadvertently? Author: CrazyJvm Closes #3620 from CrazyJvm/streaming-foreachRDD and squashes the following commits: b72886b [CrazyJvm] do you mean inadvertently? --- docs/streaming-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 44a1f3ad7560b..5ebe834a32d31 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -1081,7 +1081,7 @@ Some of the common mistakes to avoid are as follows. - Often writing data to external system requires creating a connection object (e.g. TCP connection to a remote server) and using it to send data to a remote system. -For this purpose, a developer may inadvertantly try creating a connection object at +For this purpose, a developer may inadvertently try creating a connection object at the Spark driver, but try to use it in a Spark worker to save records in the RDDs. For example (in Scala), From e895e0cbecbbec1b412ff21321e57826d2d0a982 Mon Sep 17 00:00:00 2001 From: GuoQiang Li Date: Sat, 6 Dec 2014 00:56:51 -0800 Subject: [PATCH 82/82] [SPARK-3623][GraphX] GraphX should support the checkpoint operation Author: GuoQiang Li Closes #2631 from witgo/SPARK-3623 and squashes the following commits: a70c500 [GuoQiang Li] Remove java related 4d1e249 [GuoQiang Li] Add comments e682724 [GuoQiang Li] Graph should support the checkpoint operation --- .../scala/org/apache/spark/graphx/Graph.scala | 8 +++++++ .../apache/spark/graphx/impl/GraphImpl.scala | 5 +++++ .../org/apache/spark/graphx/GraphSuite.scala | 21 +++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 637791543514c..23538b71562de 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -96,6 +96,14 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab */ def cache(): Graph[VD, ED] + /** + * Mark this Graph for checkpointing. It will be saved to a file inside the checkpoint + * directory set with SparkContext.setCheckpointDir() and all references to its parent + * RDDs will be removed. It is strongly recommended that this Graph is persisted in + * memory, otherwise saving it on a file will require recomputation. + */ + def checkpoint(): Unit + /** * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative * algorithms that modify the vertex attributes but reuse the edges. This method can be used to diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala index 0eae2a673874a..a617d84aea9d4 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala @@ -65,6 +65,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected ( this } + override def checkpoint(): Unit = { + vertices.checkpoint() + replicatedVertexView.edges.checkpoint() + } + override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = { vertices.unpersist(blocking) // TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala index a05d1ddb21295..9da0064104fb6 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.graphx import org.scalatest.FunSuite +import com.google.common.io.Files + import org.apache.spark.SparkContext import org.apache.spark.graphx.Graph._ import org.apache.spark.graphx.PartitionStrategy._ @@ -365,4 +367,23 @@ class GraphSuite extends FunSuite with LocalSparkContext { } } + test("checkpoint") { + val checkpointDir = Files.createTempDir() + checkpointDir.deleteOnExit() + withSpark { sc => + sc.setCheckpointDir(checkpointDir.getAbsolutePath) + val ring = (0L to 100L).zip((1L to 99L) :+ 0L).map { case (a, b) => Edge(a, b, 1)} + val rdd = sc.parallelize(ring) + val graph = Graph.fromEdges(rdd, 1.0F) + graph.checkpoint() + graph.edges.map(_.attr).count() + graph.vertices.map(_._2).count() + + val edgesDependencies = graph.edges.partitionsRDD.dependencies + val verticesDependencies = graph.vertices.partitionsRDD.dependencies + assert(edgesDependencies.forall(_.rdd.isInstanceOf[CheckpointRDD[_]])) + assert(verticesDependencies.forall(_.rdd.isInstanceOf[CheckpointRDD[_]])) + } + } + }