Skip to content

Commit

Permalink
Bump hadoop client version
Browse files Browse the repository at this point in the history
  • Loading branch information
istreeter committed Aug 13, 2023
1 parent 9828cc0 commit 13a2960
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 118 deletions.
3 changes: 3 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@ lazy val core: Project = project
.in(file("modules/core"))
.settings(BuildSettings.commonSettings ++ BuildSettings.logSettings)
.settings(libraryDependencies ++= Dependencies.coreDependencies)
.settings(excludeDependencies ++= Dependencies.commonExclusions)
.dependsOn(streams)

lazy val azure: Project = project
.in(file("modules/azure"))
.settings(BuildSettings.commonSettings)
.settings(BuildSettings.azureSettings)
.settings(libraryDependencies ++= Dependencies.azureDependencies)
.settings(excludeDependencies ++= Dependencies.commonExclusions)
.dependsOn(core, kafka)
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDockerPlugin)

Expand All @@ -57,6 +59,7 @@ lazy val gcp: Project = project
.settings(BuildSettings.commonSettings)
.settings(BuildSettings.gcpSettings)
.settings(libraryDependencies ++= Dependencies.gcpDependencies)
.settings(excludeDependencies ++= Dependencies.commonExclusions)
.dependsOn(core, pubsub)
.enablePlugins(BuildInfoPlugin, JavaAppPackaging, SnowplowDockerPlugin)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,134 +203,134 @@ private[processing] object Transform {
event.ti_price_base.traverse(doubleToDecimal)
).mapN { case (trTotal, trTax, trShipping, tiPrice, trTotalBase, trTaxBase, trShippingBase, tiPriceBase) =>
List[Any](
event.app_id.orNull,
event.platform.orNull,
event.etl_tstamp.map(Timestamp.from).orNull,
event.app_id.getOrElse(null),
event.platform.getOrElse(null),
event.etl_tstamp.map(Timestamp.from).getOrElse(null),
Timestamp.from(event.collector_tstamp),
event.dvce_created_tstamp,
event.event.orNull,
event.dvce_created_tstamp.getOrElse(null),
event.event.getOrElse(null),
event.event_id.toString,
event.txn_id.getOrElse(null),
event.name_tracker.orNull,
event.v_tracker.orNull,
event.name_tracker.getOrElse(null),
event.v_tracker.getOrElse(null),
event.v_collector,
event.v_etl,
event.user_id,
event.user_ipaddress,
event.user_fingerprint,
event.domain_userid.orNull,
event.user_id.getOrElse(null),
event.user_ipaddress.getOrElse(null),
event.user_fingerprint.getOrElse(null),
event.domain_userid.getOrElse(null),
event.domain_sessionidx.getOrElse(null),
event.network_userid.orNull,
event.geo_country.orNull,
event.geo_region.orNull,
event.geo_city.orNull,
event.geo_zipcode.orNull,
event.network_userid.getOrElse(null),
event.geo_country.getOrElse(null),
event.geo_region.getOrElse(null),
event.geo_city.getOrElse(null),
event.geo_zipcode.getOrElse(null),
event.geo_latitude.getOrElse(null),
event.geo_longitude.getOrElse(null),
event.geo_region_name.orNull,
event.ip_isp.orNull,
event.ip_organization.orNull,
event.ip_domain.orNull,
event.ip_netspeed.orNull,
event.page_url.orNull,
event.page_title.orNull,
event.page_referrer.orNull,
event.page_urlscheme.orNull,
event.page_urlhost.orNull,
event.geo_region_name.getOrElse(null),
event.ip_isp.getOrElse(null),
event.ip_organization.getOrElse(null),
event.ip_domain.getOrElse(null),
event.ip_netspeed.getOrElse(null),
event.page_url.getOrElse(null),
event.page_title.getOrElse(null),
event.page_referrer.getOrElse(null),
event.page_urlscheme.getOrElse(null),
event.page_urlhost.getOrElse(null),
event.page_urlport.getOrElse(null),
event.page_urlpath.orNull,
event.page_urlquery.orNull,
event.page_urlfragment.orNull,
event.refr_urlscheme.orNull,
event.refr_urlhost.orNull,
event.page_urlpath.getOrElse(null),
event.page_urlquery.getOrElse(null),
event.page_urlfragment.getOrElse(null),
event.refr_urlscheme.getOrElse(null),
event.refr_urlhost.getOrElse(null),
event.refr_urlport.getOrElse(null),
event.refr_urlpath.orNull,
event.refr_urlquery.orNull,
event.refr_urlfragment.orNull,
event.refr_medium.orNull,
event.refr_source.orNull,
event.refr_term.orNull,
event.mkt_medium.orNull,
event.mkt_source.orNull,
event.mkt_term.orNull,
event.mkt_content.orNull,
event.mkt_campaign.orNull,
event.se_category.orNull,
event.se_action.orNull,
event.se_label.orNull,
event.se_property.orNull,
event.refr_urlpath.getOrElse(null),
event.refr_urlquery.getOrElse(null),
event.refr_urlfragment.getOrElse(null),
event.refr_medium.getOrElse(null),
event.refr_source.getOrElse(null),
event.refr_term.getOrElse(null),
event.mkt_medium.getOrElse(null),
event.mkt_source.getOrElse(null),
event.mkt_term.getOrElse(null),
event.mkt_content.getOrElse(null),
event.mkt_campaign.getOrElse(null),
event.se_category.getOrElse(null),
event.se_action.getOrElse(null),
event.se_label.getOrElse(null),
event.se_property.getOrElse(null),
event.se_value.getOrElse(null),
event.tr_orderid.orNull,
event.tr_affiliation.orNull,
trTotal.orNull,
trTax.orNull,
trShipping.orNull,
event.tr_city.orNull,
event.tr_state.orNull,
event.tr_country.orNull,
event.ti_orderid.orNull,
event.ti_sku.orNull,
event.ti_name.orNull,
event.ti_category.orNull,
tiPrice.orNull,
event.tr_orderid.getOrElse(null),
event.tr_affiliation.getOrElse(null),
trTotal.getOrElse(null),
trTax.getOrElse(null),
trShipping.getOrElse(null),
event.tr_city.getOrElse(null),
event.tr_state.getOrElse(null),
event.tr_country.getOrElse(null),
event.ti_orderid.getOrElse(null),
event.ti_sku.getOrElse(null),
event.ti_name.getOrElse(null),
event.ti_category.getOrElse(null),
tiPrice.getOrElse(null),
event.ti_quantity.getOrElse(null),
event.pp_xoffset_min.getOrElse(null),
event.pp_xoffset_max.getOrElse(null),
event.pp_yoffset_min.getOrElse(null),
event.pp_yoffset_max.getOrElse(null),
event.useragent.orNull,
event.br_name.orNull,
event.br_family.orNull,
event.br_version.orNull,
event.br_type.orNull,
event.br_renderengine.orNull,
event.br_lang.orNull,
event.useragent.getOrElse(null),
event.br_name.getOrElse(null),
event.br_family.getOrElse(null),
event.br_version.getOrElse(null),
event.br_type.getOrElse(null),
event.br_renderengine.getOrElse(null),
event.br_lang.getOrElse(null),
event.br_features_pdf.getOrElse(null),
event.br_features_flash.getOrElse(null),
event.br_features_java.getOrElse(null),
event.br_features_director.orNull,
event.br_features_quicktime.orNull,
event.br_features_realplayer.orNull,
event.br_features_windowsmedia.orNull,
event.br_features_gears.orNull,
event.br_features_silverlight.orNull,
event.br_cookies.orNull,
event.br_colordepth.orNull,
event.br_viewwidth.orNull,
event.br_viewheight.orNull,
event.os_name.orNull,
event.os_family.orNull,
event.os_manufacturer.orNull,
event.os_timezone.orNull,
event.dvce_type.orNull,
event.dvce_ismobile.orNull,
event.dvce_screenwidth.orNull,
event.dvce_screenheight.orNull,
event.doc_charset.orNull,
event.doc_width.orNull,
event.doc_height.orNull,
event.tr_currency.orNull,
trTotalBase.orNull,
trTaxBase.orNull,
trShippingBase.orNull,
event.ti_currency.orNull,
tiPriceBase.orNull,
event.base_currency.orNull,
event.geo_timezone.orNull,
event.mkt_clickid.orNull,
event.mkt_network.orNull,
event.etl_tags.orNull,
event.dvce_sent_tstamp.map(Timestamp.from).orNull,
event.refr_domain_userid.orNull,
event.refr_dvce_tstamp.map(Timestamp.from).orNull,
event.domain_sessionid.orNull,
event.derived_tstamp.map(Timestamp.from).orNull,
event.event_vendor.orNull,
event.event_name.orNull,
event.event_format.orNull,
event.event_version.orNull,
event.event_fingerprint.orNull,
event.true_tstamp.map(Timestamp.from).orNull
event.br_features_director.getOrElse(null),
event.br_features_quicktime.getOrElse(null),
event.br_features_realplayer.getOrElse(null),
event.br_features_windowsmedia.getOrElse(null),
event.br_features_gears.getOrElse(null),
event.br_features_silverlight.getOrElse(null),
event.br_cookies.getOrElse(null),
event.br_colordepth.getOrElse(null),
event.br_viewwidth.getOrElse(null),
event.br_viewheight.getOrElse(null),
event.os_name.getOrElse(null),
event.os_family.getOrElse(null),
event.os_manufacturer.getOrElse(null),
event.os_timezone.getOrElse(null),
event.dvce_type.getOrElse(null),
event.dvce_ismobile.getOrElse(null),
event.dvce_screenwidth.getOrElse(null),
event.dvce_screenheight.getOrElse(null),
event.doc_charset.getOrElse(null),
event.doc_width.getOrElse(null),
event.doc_height.getOrElse(null),
event.tr_currency.getOrElse(null),
trTotalBase.getOrElse(null),
trTaxBase.getOrElse(null),
trShippingBase.getOrElse(null),
event.ti_currency.getOrElse(null),
tiPriceBase.getOrElse(null),
event.base_currency.getOrElse(null),
event.geo_timezone.getOrElse(null),
event.mkt_clickid.getOrElse(null),
event.mkt_network.getOrElse(null),
event.etl_tags.getOrElse(null),
event.dvce_sent_tstamp.map(Timestamp.from).getOrElse(null),
event.refr_domain_userid.getOrElse(null),
event.refr_dvce_tstamp.map(Timestamp.from).getOrElse(null),
event.domain_sessionid.getOrElse(null),
event.derived_tstamp.map(Timestamp.from).getOrElse(null),
event.event_vendor.getOrElse(null),
event.event_name.getOrElse(null),
event.event_format.getOrElse(null),
event.event_version.getOrElse(null),
event.event_fingerprint.getOrElse(null),
event.true_tstamp.map(Timestamp.from).getOrElse(null)
)
}.leftMap(castErrorToLoaderIgluError(AtomicFields.schemaKey, _))

Expand Down
33 changes: 24 additions & 9 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ object Dependencies {
val spark = "3.4.1"
val delta = "2.4.0"
val iceberg = "1.3.0"
val hadoop = "3.3.5"
val hadoop = "3.3.6"
val gcsConnector = "2.2.15"
val biglakeIceberg = "0.1.0"
val hiveCommon = "3.1.3"
Expand All @@ -49,6 +49,7 @@ object Dependencies {
// Transitive overrides
val protobuf = "3.19.6"
val snappy = "1.1.10.2"
val thrift = "0.18.1"

// tests
val specs2 = "4.20.0"
Expand All @@ -74,22 +75,25 @@ object Dependencies {

// spark and hadoop
val sparkCore = "org.apache.spark" %% "spark-core" % V.spark
val sparkSql = "org.apache.spark" %% "spark-sql" % V.spark
val sparkSql = ("org.apache.spark" %% "spark-sql" % V.spark)
val delta = "io.delta" %% "delta-core" % V.delta
val iceberg = "org.apache.iceberg" %% "iceberg-spark-runtime-3.4" % V.iceberg
val hadoopClient = "org.apache.hadoop" % "hadoop-client" % V.hadoop
val hadoopAzure = "org.apache.hadoop" % "hadoop-azure" % V.hadoop
val gcsConnector = "com.google.cloud.bigdataoss" % "gcs-connector" % s"${V.gcsConnector}-hadoop3" from s"https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v${V.gcsConnector}/gcs-connector-hadoop3-${V.gcsConnector}-shaded.jar"
val hiveCommon = "org.apache.hive" % "hive-common" % V.hiveCommon
val hadoopClient = "org.apache.hadoop" % "hadoop-client-runtime" % V.hadoop
val hadoopAzure = "org.apache.hadoop" % "hadoop-azure" % V.hadoop
val gcsConnector = "com.google.cloud.bigdataoss" % "gcs-connector" % s"${V.gcsConnector}-hadoop3" from s"https://github.com/GoogleCloudDataproc/hadoop-connectors/releases/download/v${V.gcsConnector}/gcs-connector-hadoop3-${V.gcsConnector}-shaded.jar"
val hiveCommon = ("org.apache.hive" % "hive-common" % V.hiveCommon)
.exclude("com.github.joshelser", "dropwizard-metrics-hadoop-metrics2-reporter")

// java
val slf4j = "org.slf4j" % "slf4j-simple" % V.slf4j
val azureIdentity = "com.azure" % "azure-identity" % V.azureSdk
val sentry = "io.sentry" % "sentry" % V.sentry

// transitive overrides
val protobuf = "com.google.protobuf" % "protobuf-java" % V.protobuf
val snappy = "org.xerial.snappy" % "snappy-java" % V.snappy
val protobuf = "com.google.protobuf" % "protobuf-java" % V.protobuf
val snappy = "org.xerial.snappy" % "snappy-java" % V.snappy
val hadoopYarn = "org.apache.hadoop" % "hadoop-yarn-server-resourcemanager" % V.hadoop
val thrift = "org.apache.thrift" % "libthrift" % V.thrift

// snowplow: Note jackson-databind 2.14.x is incompatible with Spark
val badrows = "com.snowplowanalytics" %% "snowplow-badrows" % V.badrows
Expand Down Expand Up @@ -169,7 +173,18 @@ object Dependencies {

val gcpDependencies = Seq(
gcsConnector % Runtime,
hiveCommon % Runtime
hiveCommon % Runtime,
hadoopYarn % Runtime,
thrift % Runtime,
) ++ commonRuntimeDependencies

val commonExclusions = Seq(
ExclusionRule(organization = "org.apache.zookeeper", name = "zookeeper"),
ExclusionRule(organization = "org.eclipse.jetty", name = "jetty-client"),
ExclusionRule(organization = "org.eclipse.jetty", name = "jetty-server"),
ExclusionRule(organization = "org.eclipse.jetty", name = "jetty-http"),
ExclusionRule(organization = "org.apache.hadoop", name = "hadoop-yarn-server-applicationhistoryservice"),
ExclusionRule(organization = "org.apache.hadoop", name = "hadoop-yarn-server-common"),
)

}

0 comments on commit 13a2960

Please sign in to comment.