From bf5822ed4673163b8073f186a61490e222e11f5e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:06:08 +0100 Subject: [PATCH 01/12] updated operator-rs --- rust/crd/Cargo.toml | 2 +- rust/operator-binary/Cargo.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/crd/Cargo.toml b/rust/crd/Cargo.toml index fcb16252..9acc7f26 100644 --- a/rust/crd/Cargo.toml +++ b/rust/crd/Cargo.toml @@ -13,6 +13,6 @@ indoc = "1.0.8" serde = "1.0" serde_json = "1.0" snafu = "0.7" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.30.1" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.30.2" } strum = { version = "0.24", features = ["derive"] } tracing = "0.1" diff --git a/rust/operator-binary/Cargo.toml b/rust/operator-binary/Cargo.toml index f2922d5a..d15e8a8b 100644 --- a/rust/operator-binary/Cargo.toml +++ b/rust/operator-binary/Cargo.toml @@ -19,12 +19,12 @@ serde = "1.0" serde_json = "1.0" snafu = "0.7" stackable-hive-crd = { path = "../crd" } -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.30.1" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.30.2" } strum = { version = "0.24", features = ["derive"] } tokio = { version = "1.23", features = ["full"] } tracing = "0.1" [build-dependencies] built = { version = "0.5", features = ["chrono", "git2"] } -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.30.1" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.30.2" } stackable-hive-crd = { path = "../crd" } From 9a6f3d58fb5da6f0c9e44b0b6b03f971267f34cb Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:14:48 +0100 Subject: [PATCH 02/12] introduced top level cluster config --- Cargo.lock | 8 ++--- rust/crd/src/lib.rs | 45 ++++++++++++++++++-------- rust/operator-binary/src/controller.rs | 22 +++++++------ 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 691400b1..ab2e0e59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1799,8 +1799,8 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.30.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.30.1#34cc76200b6432f7b143ca1350e665ddb169df88" +version = "0.30.2" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.30.2#133db3918fb3af191a4203106a5056d77cc9579f" dependencies = [ "chrono", "clap", @@ -1833,8 +1833,8 @@ dependencies = [ [[package]] name = "stackable-operator-derive" -version = "0.30.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.30.1#34cc76200b6432f7b143ca1350e665ddb169df88" +version = "0.30.2" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.30.2#133db3918fb3af191a4203106a5056d77cc9579f" dependencies = [ "darling", "proc-macro2", diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 5547413b..3d4af79b 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -70,17 +70,28 @@ pub enum Error { ) )] pub struct HiveClusterSpec { - /// Emergency stop button, if `true` then all pods are stopped without affecting configuration (as setting `replicas` to `0` would) - #[serde(default, skip_serializing_if = "Option::is_none")] - pub stopped: Option, + /// General Hive metastore cluster settings + pub cluster_config: HiveClusterConfig, /// The Hive metastore image to use pub image: ProductImage, #[serde(default, skip_serializing_if = "Option::is_none")] pub metastore: Option>, + /// Emergency stop button, if `true` then all pods are stopped without affecting configuration (as setting `replicas` to `0` would) #[serde(default, skip_serializing_if = "Option::is_none")] - pub s3: Option, + pub stopped: Option, +} + +#[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct HiveClusterConfig { + /// Database connection specification + pub database: DatabaseConnectionSpec, + /// HDFS connection specification #[serde(default, skip_serializing_if = "Option::is_none")] pub hdfs: Option, + /// S3 connection specification + #[serde(default, skip_serializing_if = "Option::is_none")] + pub s3: Option, /// Specify the type of the created kubernetes service. /// This attribute will be removed in a future release when listener-operator is finished. /// Use with caution. @@ -95,7 +106,7 @@ pub struct HdfsConnection { pub config_map: String, } -#[derive(strum::Display)] +#[derive(Display)] #[strum(serialize_all = "camelCase")] pub enum HiveRole { #[strum(serialize = "metastore")] @@ -150,8 +161,6 @@ pub struct MetastoreStorageConfig { #[serde(rename_all = "camelCase")] pub struct MetaStoreConfig { pub warehouse_dir: Option, - #[serde(default)] - pub database: DatabaseConnectionSpec, pub resources: Option>, } @@ -283,20 +292,20 @@ impl Configuration for MetaStoreConfig { fn compute_cli( &self, - _resource: &Self::Configurable, + resource: &Self::Configurable, _role_name: &str, ) -> Result>, ConfigError> { let mut result = BTreeMap::new(); result.insert( Self::DB_TYPE_CLI.to_string(), - Some(self.database.db_type.to_string()), + Some(resource.spec.cluster_config.database.db_type.to_string()), ); Ok(result) } fn compute_files( &self, - _resource: &Self::Configurable, + resource: &Self::Configurable, _role_name: &str, _file: &str, ) -> Result>, ConfigError> { @@ -310,19 +319,27 @@ impl Configuration for MetaStoreConfig { } result.insert( Self::CONNECTION_URL.to_string(), - Some(self.database.conn_string.clone()), + Some(resource.spec.cluster_config.database.conn_string.clone()), ); result.insert( Self::CONNECTION_USER_NAME.to_string(), - Some(self.database.user.clone()), + Some(resource.spec.cluster_config.database.user.clone()), ); result.insert( Self::CONNECTION_PASSWORD.to_string(), - Some(self.database.password.clone()), + Some(resource.spec.cluster_config.database.password.clone()), ); result.insert( Self::CONNECTION_DRIVER_NAME.to_string(), - Some(self.database.db_type.get_jdbc_driver_class().to_string()), + Some( + resource + .spec + .cluster_config + .database + .db_type + .get_jdbc_driver_class() + .to_string(), + ), ); result.insert( diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 89267428..e35b4cb5 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -174,15 +174,16 @@ pub async fn reconcile_hive(hive: Arc, ctx: Arc) -> Result = if let Some(s3) = &hive.spec.s3 { - Some( - s3.resolve(client, hive.namespace().as_deref().unwrap()) - .await - .context(ResolveS3ConnectionSnafu)?, - ) - } else { - None - }; + let s3_connection_spec: Option = + if let Some(s3) = &hive.spec.cluster_config.s3 { + Some( + s3.resolve(client, hive.namespace().as_deref().unwrap()) + .await + .context(ResolveS3ConnectionSnafu)?, + ) + } else { + None + }; let validated_config = validate_all_roles_and_groups_config( &resolved_product_image.product_version, @@ -348,6 +349,7 @@ pub fn build_metastore_role_service( selector: Some(role_selector_labels(hive, APP_NAME, &role_name)), type_: Some( hive.spec + .cluster_config .service_type .clone() .unwrap_or_default() @@ -563,7 +565,7 @@ fn build_metastore_rolegroup_statefulset( } } - if let Some(hdfs) = &hive.spec.hdfs { + if let Some(hdfs) = &hive.spec.cluster_config.hdfs { pod_builder.add_volume( VolumeBuilder::new("hdfs-site") .with_config_map(&hdfs.config_map) From 464792af44e801a330bda55376b0f90491f2cb97 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:19:28 +0100 Subject: [PATCH 03/12] adapted examples --- examples/simple-hive-cluster-postgres-s3.yaml | 29 +++++++++---------- examples/simple-hive-cluster.yaml | 11 +++---- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/examples/simple-hive-cluster-postgres-s3.yaml b/examples/simple-hive-cluster-postgres-s3.yaml index 03c7568d..46644092 100644 --- a/examples/simple-hive-cluster-postgres-s3.yaml +++ b/examples/simple-hive-cluster-postgres-s3.yaml @@ -19,26 +19,23 @@ spec: image: productVersion: 3.1.3 stackableVersion: 0.2.0 - s3: - inline: - host: test-minio - port: 9000 - accessStyle: Path - credentials: - secretClass: simple-hive-s3-secret-class + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + user: APP + password: mine + dbType: derby + s3: + inline: + host: test-minio + port: 9000 + accessStyle: Path + credentials: + secretClass: simple-hive-s3-secret-class metastore: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 - config: - database: - connString: jdbc:postgresql://hive-postgresql.default.svc.cluster.local:5432/hive - user: hive - password: hive - dbType: postgres --- apiVersion: secrets.stackable.tech/v1alpha1 kind: SecretClass diff --git a/examples/simple-hive-cluster.yaml b/examples/simple-hive-cluster.yaml index f1ecec6a..9006e09b 100644 --- a/examples/simple-hive-cluster.yaml +++ b/examples/simple-hive-cluster.yaml @@ -7,16 +7,17 @@ spec: image: productVersion: 2.3.9 stackableVersion: 0.6.0 + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + user: APP + password: mine + dbType: derby metastore: roleGroups: default: replicas: 1 config: - database: - connString: jdbc:derby:;databaseName=/tmp/hive;create=true - user: APP - password: mine - dbType: derby resources: storage: data: From e5a251874ae86fd24bd754d1d63bab10b403b596 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:24:54 +0100 Subject: [PATCH 04/12] adapted docs --- docs/modules/ROOT/pages/usage.adoc | 50 +++++++++---------- .../examples/code/hive-postgres-s3.yaml | 19 +++---- 2 files changed, 31 insertions(+), 38 deletions(-) diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc index de754a40..f86fc627 100644 --- a/docs/modules/ROOT/pages/usage.adoc +++ b/docs/modules/ROOT/pages/usage.adoc @@ -9,12 +9,13 @@ Please open an https://github.com/stackabletech/hive-operator/issues[issue] if y == S3 Support Hive supports creating tables in S3 compatible object stores. -To use this feature you need to provide connection details for the object store using the xref:home:concepts:s3.adoc[S3Connection]. +To use this feature you need to provide connection details for the object store using the xref:home:concepts:s3.adoc[S3Connection] in the top level `clusterConfig`. An example usage can look like this: [source,yaml] ---- +clusterConfig: s3: inline: host: minio @@ -27,10 +28,11 @@ An example usage can look like this: == Apache HDFS Support As well as S3, Hive also supports creating tables in HDFS. -You can add the HDFS connection as follows: +You can add the HDFS connection in the top level `clusterConfig` as follows: [source,yaml] ---- +clusterConfig: hdfs: configMap: my-hdfs-cluster # Name of the HdfsCluster ---- @@ -185,19 +187,16 @@ spec: image: productVersion: 3.1.3 stackableVersion: 0.2.0 + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/metastore_db;create=true + user: APP + password: mine + dbType: derby metastore: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 - config: - database: - connString: jdbc:derby:;databaseName=/tmp/metastore_db;create=true - user: APP - password: mine - dbType: derby ---- To create a single node Apache Hive Metastore (v2.3.9) cluster with derby and S3 access, deploy a minio (or use any available S3 bucket): @@ -229,26 +228,23 @@ spec: image: productVersion: 3.1.3 stackableVersion: 0.2.0 - s3: - inline: - host: minio - port: 9000 - accessStyle: Path - credentials: - secretClass: simple-hive-s3-secret-class + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/stackable/metastore_db;create=true + user: APP + password: mine + dbType: derby + s3: + inline: + host: minio + port: 9000 + accessStyle: Path + credentials: + secretClass: simple-hive-s3-secret-class metastore: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 - config: - database: - connString: jdbc:derby:;databaseName=/stackable/metastore_db;create=true - user: APP - password: mine - dbType: derby --- apiVersion: secrets.stackable.tech/v1alpha1 kind: SecretClass diff --git a/docs/modules/getting_started/examples/code/hive-postgres-s3.yaml b/docs/modules/getting_started/examples/code/hive-postgres-s3.yaml index d44ec38a..2035d1be 100644 --- a/docs/modules/getting_started/examples/code/hive-postgres-s3.yaml +++ b/docs/modules/getting_started/examples/code/hive-postgres-s3.yaml @@ -7,18 +7,15 @@ spec: image: productVersion: 3.1.3 stackableVersion: 0.2.0 - s3: - reference: minio + clusterConfig: + database: + connString: jdbc:postgresql://postgresql:5432/hive + user: hive + password: hive + dbType: postgres + s3: + reference: minio metastore: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 - config: - database: - connString: jdbc:postgresql://postgresql:5432/hive - user: hive - password: hive - dbType: postgres From 85c20a1ed3893ac1bc8f6340cf4f9ebeb670fc91 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:27:41 +0100 Subject: [PATCH 05/12] regenerated charts --- deploy/helm/hive-operator/crds/crds.yaml | 307 +++++++++++------------ deploy/manifests/crds.yaml | 307 +++++++++++------------ 2 files changed, 284 insertions(+), 330 deletions(-) diff --git a/deploy/helm/hive-operator/crds/crds.yaml b/deploy/helm/hive-operator/crds/crds.yaml index 9dbae6c0..77e0eaa0 100644 --- a/deploy/helm/hive-operator/crds/crds.yaml +++ b/deploy/helm/hive-operator/crds/crds.yaml @@ -24,14 +24,150 @@ spec: properties: spec: properties: - hdfs: - nullable: true + clusterConfig: + description: General Hive metastore cluster settings properties: - configMap: - description: Name of the discovery-configmap providing information about the HDFS cluster + database: + description: Database connection specification + properties: + connString: + type: string + dbType: + enum: + - derby + - mysql + - postgres + - oracle + - mssql + type: string + password: + type: string + user: + type: string + required: + - connString + - dbType + - password + - user + type: object + hdfs: + description: HDFS connection specification + nullable: true + properties: + configMap: + description: Name of the discovery-configmap providing information about the HDFS cluster + type: string + required: + - configMap + type: object + s3: + description: S3 connection specification + nullable: true + oneOf: + - required: + - inline + - required: + - reference + properties: + inline: + description: S3 connection definition as CRD. + properties: + accessStyle: + description: Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on + enum: + - Path + - VirtualHosted + nullable: true + type: string + credentials: + description: If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient. + nullable: true + properties: + scope: + description: '[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)' + nullable: true + properties: + node: + default: false + type: boolean + pod: + default: false + type: boolean + services: + default: [] + items: + type: string + type: array + type: object + secretClass: + description: '[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials' + type: string + required: + - secretClass + type: object + host: + description: Hostname of the S3 server without any protocol or port + nullable: true + type: string + port: + description: Port the S3 server listens on. If not specified the products will determine the port to use. + format: uint16 + minimum: 0.0 + nullable: true + type: integer + tls: + description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. + nullable: true + properties: + verification: + description: The verification method used to verify the certificates of the server and/or the client + oneOf: + - required: + - none + - required: + - server + properties: + none: + description: Use TLS but don't verify certificates + type: object + server: + description: Use TLS and ca certificate to verify the server + properties: + caCert: + description: Ca cert to verify the server + oneOf: + - required: + - webPki + - required: + - secretClass + properties: + secretClass: + description: Name of the SecretClass which will provide the ca cert. Note that a SecretClass does not need to have a key but can also work with just a ca cert. So if you got provided with a ca cert but don't have access to the key you can still use this method. + type: string + webPki: + description: Use TLS and the ca certificates trusted by the common web browsers to verify the server. This can be useful when you e.g. use public AWS S3 or other public available services. + type: object + type: object + required: + - caCert + type: object + type: object + required: + - verification + type: object + type: object + reference: + type: string + type: object + serviceType: + description: Specify the type of the created kubernetes service. This attribute will be removed in a future release when listener-operator is finished. Use with caution. + enum: + - NodePort + - ClusterIP + nullable: true type: string required: - - configMap + - database type: object image: anyOf: @@ -87,33 +223,6 @@ spec: config: default: {} properties: - database: - default: - connString: '' - user: '' - password: '' - dbType: derby - properties: - connString: - type: string - dbType: - enum: - - derby - - mysql - - postgres - - oracle - - mssql - type: string - password: - type: string - user: - type: string - required: - - connString - - dbType - - password - - user - type: object resources: nullable: true properties: @@ -338,33 +447,6 @@ spec: config: default: {} properties: - database: - default: - connString: '' - user: '' - password: '' - dbType: derby - properties: - connString: - type: string - dbType: - enum: - - derby - - mysql - - postgres - - oracle - - mssql - type: string - password: - type: string - user: - type: string - required: - - connString - - dbType - - password - - user - type: object resources: nullable: true properties: @@ -619,117 +701,12 @@ spec: required: - roleGroups type: object - s3: - description: Operators are expected to define fields for this type in order to work with S3 connections. - nullable: true - oneOf: - - required: - - inline - - required: - - reference - properties: - inline: - description: S3 connection definition as CRD. - properties: - accessStyle: - description: Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on - enum: - - Path - - VirtualHosted - nullable: true - type: string - credentials: - description: If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient. - nullable: true - properties: - scope: - description: '[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)' - nullable: true - properties: - node: - default: false - type: boolean - pod: - default: false - type: boolean - services: - default: [] - items: - type: string - type: array - type: object - secretClass: - description: '[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials' - type: string - required: - - secretClass - type: object - host: - description: Hostname of the S3 server without any protocol or port - nullable: true - type: string - port: - description: Port the S3 server listens on. If not specified the products will determine the port to use. - format: uint16 - minimum: 0.0 - nullable: true - type: integer - tls: - description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. - nullable: true - properties: - verification: - description: The verification method used to verify the certificates of the server and/or the client - oneOf: - - required: - - none - - required: - - server - properties: - none: - description: Use TLS but don't verify certificates - type: object - server: - description: Use TLS and ca certificate to verify the server - properties: - caCert: - description: Ca cert to verify the server - oneOf: - - required: - - webPki - - required: - - secretClass - properties: - secretClass: - description: Name of the SecretClass which will provide the ca cert. Note that a SecretClass does not need to have a key but can also work with just a ca cert. So if you got provided with a ca cert but don't have access to the key you can still use this method. - type: string - webPki: - description: Use TLS and the ca certificates trusted by the common web browsers to verify the server. This can be useful when you e.g. use public AWS S3 or other public available services. - type: object - type: object - required: - - caCert - type: object - type: object - required: - - verification - type: object - type: object - reference: - type: string - type: object - serviceType: - description: Specify the type of the created kubernetes service. This attribute will be removed in a future release when listener-operator is finished. Use with caution. - enum: - - NodePort - - ClusterIP - nullable: true - type: string stopped: description: Emergency stop button, if `true` then all pods are stopped without affecting configuration (as setting `replicas` to `0` would) nullable: true type: boolean required: + - clusterConfig - image type: object status: diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml index 0fa81862..468251bf 100644 --- a/deploy/manifests/crds.yaml +++ b/deploy/manifests/crds.yaml @@ -25,14 +25,150 @@ spec: properties: spec: properties: - hdfs: - nullable: true + clusterConfig: + description: General Hive metastore cluster settings properties: - configMap: - description: Name of the discovery-configmap providing information about the HDFS cluster + database: + description: Database connection specification + properties: + connString: + type: string + dbType: + enum: + - derby + - mysql + - postgres + - oracle + - mssql + type: string + password: + type: string + user: + type: string + required: + - connString + - dbType + - password + - user + type: object + hdfs: + description: HDFS connection specification + nullable: true + properties: + configMap: + description: Name of the discovery-configmap providing information about the HDFS cluster + type: string + required: + - configMap + type: object + s3: + description: S3 connection specification + nullable: true + oneOf: + - required: + - inline + - required: + - reference + properties: + inline: + description: S3 connection definition as CRD. + properties: + accessStyle: + description: Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on + enum: + - Path + - VirtualHosted + nullable: true + type: string + credentials: + description: If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient. + nullable: true + properties: + scope: + description: '[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)' + nullable: true + properties: + node: + default: false + type: boolean + pod: + default: false + type: boolean + services: + default: [] + items: + type: string + type: array + type: object + secretClass: + description: '[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials' + type: string + required: + - secretClass + type: object + host: + description: Hostname of the S3 server without any protocol or port + nullable: true + type: string + port: + description: Port the S3 server listens on. If not specified the products will determine the port to use. + format: uint16 + minimum: 0.0 + nullable: true + type: integer + tls: + description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. + nullable: true + properties: + verification: + description: The verification method used to verify the certificates of the server and/or the client + oneOf: + - required: + - none + - required: + - server + properties: + none: + description: Use TLS but don't verify certificates + type: object + server: + description: Use TLS and ca certificate to verify the server + properties: + caCert: + description: Ca cert to verify the server + oneOf: + - required: + - webPki + - required: + - secretClass + properties: + secretClass: + description: Name of the SecretClass which will provide the ca cert. Note that a SecretClass does not need to have a key but can also work with just a ca cert. So if you got provided with a ca cert but don't have access to the key you can still use this method. + type: string + webPki: + description: Use TLS and the ca certificates trusted by the common web browsers to verify the server. This can be useful when you e.g. use public AWS S3 or other public available services. + type: object + type: object + required: + - caCert + type: object + type: object + required: + - verification + type: object + type: object + reference: + type: string + type: object + serviceType: + description: Specify the type of the created kubernetes service. This attribute will be removed in a future release when listener-operator is finished. Use with caution. + enum: + - NodePort + - ClusterIP + nullable: true type: string required: - - configMap + - database type: object image: anyOf: @@ -88,33 +224,6 @@ spec: config: default: {} properties: - database: - default: - connString: '' - user: '' - password: '' - dbType: derby - properties: - connString: - type: string - dbType: - enum: - - derby - - mysql - - postgres - - oracle - - mssql - type: string - password: - type: string - user: - type: string - required: - - connString - - dbType - - password - - user - type: object resources: nullable: true properties: @@ -339,33 +448,6 @@ spec: config: default: {} properties: - database: - default: - connString: '' - user: '' - password: '' - dbType: derby - properties: - connString: - type: string - dbType: - enum: - - derby - - mysql - - postgres - - oracle - - mssql - type: string - password: - type: string - user: - type: string - required: - - connString - - dbType - - password - - user - type: object resources: nullable: true properties: @@ -620,117 +702,12 @@ spec: required: - roleGroups type: object - s3: - description: Operators are expected to define fields for this type in order to work with S3 connections. - nullable: true - oneOf: - - required: - - inline - - required: - - reference - properties: - inline: - description: S3 connection definition as CRD. - properties: - accessStyle: - description: Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on - enum: - - Path - - VirtualHosted - nullable: true - type: string - credentials: - description: If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient. - nullable: true - properties: - scope: - description: '[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)' - nullable: true - properties: - node: - default: false - type: boolean - pod: - default: false - type: boolean - services: - default: [] - items: - type: string - type: array - type: object - secretClass: - description: '[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials' - type: string - required: - - secretClass - type: object - host: - description: Hostname of the S3 server without any protocol or port - nullable: true - type: string - port: - description: Port the S3 server listens on. If not specified the products will determine the port to use. - format: uint16 - minimum: 0.0 - nullable: true - type: integer - tls: - description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. - nullable: true - properties: - verification: - description: The verification method used to verify the certificates of the server and/or the client - oneOf: - - required: - - none - - required: - - server - properties: - none: - description: Use TLS but don't verify certificates - type: object - server: - description: Use TLS and ca certificate to verify the server - properties: - caCert: - description: Ca cert to verify the server - oneOf: - - required: - - webPki - - required: - - secretClass - properties: - secretClass: - description: Name of the SecretClass which will provide the ca cert. Note that a SecretClass does not need to have a key but can also work with just a ca cert. So if you got provided with a ca cert but don't have access to the key you can still use this method. - type: string - webPki: - description: Use TLS and the ca certificates trusted by the common web browsers to verify the server. This can be useful when you e.g. use public AWS S3 or other public available services. - type: object - type: object - required: - - caCert - type: object - type: object - required: - - verification - type: object - type: object - reference: - type: string - type: object - serviceType: - description: Specify the type of the created kubernetes service. This attribute will be removed in a future release when listener-operator is finished. Use with caution. - enum: - - NodePort - - ClusterIP - nullable: true - type: string stopped: description: Emergency stop button, if `true` then all pods are stopped without affecting configuration (as setting `replicas` to `0` would) nullable: true type: boolean required: + - clusterConfig - image type: object status: From 2e76b70fc63457fece5200d090c2b9050b63f44a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:41:01 +0100 Subject: [PATCH 06/12] adapted tests --- .../01-install-hive.yaml.j2 | 18 ++++++------------ .../03-remove-role-group.yaml.j2 | 6 ++++++ .../04-change-rolegroup.yaml | 12 ++++++------ .../kuttl/resources/10-install-hive.yaml.j2 | 17 ++++++----------- .../kuttl/smoke/10-install-hive.yaml.j2 | 19 ++++++++----------- 5 files changed, 32 insertions(+), 40 deletions(-) diff --git a/tests/templates/kuttl/orphaned-resources/01-install-hive.yaml.j2 b/tests/templates/kuttl/orphaned-resources/01-install-hive.yaml.j2 index dcbf4672..9f20a16e 100644 --- a/tests/templates/kuttl/orphaned-resources/01-install-hive.yaml.j2 +++ b/tests/templates/kuttl/orphaned-resources/01-install-hive.yaml.j2 @@ -7,21 +7,15 @@ spec: image: productVersion: "{{ test_scenario['values']['hive-latest'].split('-stackable')[0] }}" stackableVersion: "{{ test_scenario['values']['hive-latest'].split('-stackable')[1] }}" + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + user: APP + password: mine + dbType: derby metastore: roleGroups: default: replicas: 1 - config: - database: - connString: jdbc:derby:;databaseName=/tmp/hive;create=true - user: APP - password: mine - dbType: derby remove: replicas: 1 - config: - database: - connString: jdbc:derby:;databaseName=/tmp/hive;create=true - user: APP - password: mine - dbType: derby diff --git a/tests/templates/kuttl/orphaned-resources/03-remove-role-group.yaml.j2 b/tests/templates/kuttl/orphaned-resources/03-remove-role-group.yaml.j2 index 0e444610..b4c6cd23 100644 --- a/tests/templates/kuttl/orphaned-resources/03-remove-role-group.yaml.j2 +++ b/tests/templates/kuttl/orphaned-resources/03-remove-role-group.yaml.j2 @@ -7,6 +7,12 @@ spec: image: productVersion: "{{ test_scenario['values']['hive-latest'].split('-stackable')[0] }}" stackableVersion: "{{ test_scenario['values']['hive-latest'].split('-stackable')[1] }}" + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + user: APP + password: mine + dbType: derby metastore: roleGroups: remove: null diff --git a/tests/templates/kuttl/orphaned-resources/04-change-rolegroup.yaml b/tests/templates/kuttl/orphaned-resources/04-change-rolegroup.yaml index 01482e3e..8e70c973 100644 --- a/tests/templates/kuttl/orphaned-resources/04-change-rolegroup.yaml +++ b/tests/templates/kuttl/orphaned-resources/04-change-rolegroup.yaml @@ -4,14 +4,14 @@ kind: HiveCluster metadata: name: test-hive spec: + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + user: APP + password: mine + dbType: derby metastore: roleGroups: default: null newrolegroup: replicas: 1 - config: - database: - connString: jdbc:derby:;databaseName=/tmp/hive;create=true - user: APP - password: mine - dbType: derby diff --git a/tests/templates/kuttl/resources/10-install-hive.yaml.j2 b/tests/templates/kuttl/resources/10-install-hive.yaml.j2 index 03f4c205..fc5c8f28 100644 --- a/tests/templates/kuttl/resources/10-install-hive.yaml.j2 +++ b/tests/templates/kuttl/resources/10-install-hive.yaml.j2 @@ -7,6 +7,12 @@ spec: image: productVersion: "{{ test_scenario['values']['hive'].split('-stackable')[0] }}" stackableVersion: "{{ test_scenario['values']['hive'].split('-stackable')[1] }}" + clusterConfig: + database: + connString: jdbc:derby:;databaseName=/tmp/hive;create=true + user: APP + password: mine + dbType: derby metastore: config: resources: @@ -21,20 +27,9 @@ spec: roleGroups: resources-from-role: replicas: 1 - config: - database: - connString: jdbc:derby:;databaseName=/tmp/hive;create=true - user: APP - password: mine - dbType: derby resources-from-role-group: replicas: 1 config: - database: - connString: jdbc:derby:;databaseName=/tmp/hive;create=true - user: APP - password: mine - dbType: derby resources: storage: data: diff --git a/tests/templates/kuttl/smoke/10-install-hive.yaml.j2 b/tests/templates/kuttl/smoke/10-install-hive.yaml.j2 index 17c0de73..610d1d19 100644 --- a/tests/templates/kuttl/smoke/10-install-hive.yaml.j2 +++ b/tests/templates/kuttl/smoke/10-install-hive.yaml.j2 @@ -7,21 +7,18 @@ spec: image: productVersion: "{{ test_scenario['values']['hive'].split('-stackable')[0] }}" stackableVersion: "{{ test_scenario['values']['hive'].split('-stackable')[1] }}" - s3: - reference: minio + clusterConfig: + database: + connString: jdbc:postgresql://hive-postgresql:5432/hive + user: hive + password: hive + dbType: postgres + s3: + reference: minio metastore: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 - config: - database: - connString: jdbc:postgresql://hive-postgresql:5432/hive - user: hive - password: hive - dbType: postgres --- apiVersion: s3.stackable.tech/v1alpha1 kind: S3Connection From 59b36bf82a96e880f63aaedc4856ebf7c4c0545a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:41:57 +0100 Subject: [PATCH 07/12] adapted last docs --- docs/modules/ROOT/pages/usage.adoc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc index f86fc627..55e8f69d 100644 --- a/docs/modules/ROOT/pages/usage.adoc +++ b/docs/modules/ROOT/pages/usage.adoc @@ -199,6 +199,8 @@ spec: replicas: 1 ---- +WARNING: You should not use the `Derby` database with more than one replica or in production. Derby stores data locally and therefore the data not shared between different metastore Pods and lost after Pod restarts. + To create a single node Apache Hive Metastore (v2.3.9) cluster with derby and S3 access, deploy a minio (or use any available S3 bucket): [source,bash] ---- @@ -298,17 +300,14 @@ spec: image: productVersion: 3.1.3 stackableVersion: 0.2.0 + clusterConfig: + database: + connString: jdbc:postgresql://hive-postgresql.default.svc.cluster.local:5432/hive + user: hive + password: hive + dbType: postgres metastore: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 - config: - database: - connString: jdbc:postgresql://hive-postgresql.default.svc.cluster.local:5432/hive - user: hive - password: hive - dbType: postgres ---- From 7744353b5b0462f271247f60e80f5082fa72e716 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:49:30 +0100 Subject: [PATCH 08/12] adapted changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 386d30c8..eef16d11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,15 @@ All notable changes to this project will be documented in this file. - Consolidated security context user, group and fs group ([#277]). - [BREAKING] Use Product image selection instead of version. `spec.version` has been replaced by `spec.image` ([#280]). - Fix role group node selector ([#283]). +- [BREAKING] Moved `database` specification from role / role-group level to top-level `clusterConfig` ([#292]). +- [BREAKING] Moved `s3`, `serviceType` and `hdfs` discovery to top-level `clusterConfig` ([#292]). [#271]: https://github.com/stackabletech/hive-operator/pull/271 [#274]: https://github.com/stackabletech/hive-operator/pull/274 [#277]: https://github.com/stackabletech/hive-operator/pull/277 [#280]: https://github.com/stackabletech/hive-operator/pull/280 [#283]: https://github.com/stackabletech/hive-operator/pull/283 +[#283]: https://github.com/stackabletech/hive-operator/pull/292 ## [0.8.0] - 2022-11-07 From 4d90c28dc3da0f366e7b1c4e0ec9e38f5177913f Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:51:32 +0100 Subject: [PATCH 09/12] fixed changelgo reference --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eef16d11..1bf2d456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ All notable changes to this project will be documented in this file. [#277]: https://github.com/stackabletech/hive-operator/pull/277 [#280]: https://github.com/stackabletech/hive-operator/pull/280 [#283]: https://github.com/stackabletech/hive-operator/pull/283 -[#283]: https://github.com/stackabletech/hive-operator/pull/292 +[#292]: https://github.com/stackabletech/hive-operator/pull/292 ## [0.8.0] - 2022-11-07 From d91d6b294ba4a94460e760ffeebdd5fbdf127095 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 15:54:32 +0100 Subject: [PATCH 10/12] fixes --- CHANGELOG.md | 1 + docs/modules/ROOT/pages/usage.adoc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bf2d456..d4c6baec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file. - Fix role group node selector ([#283]). - [BREAKING] Moved `database` specification from role / role-group level to top-level `clusterConfig` ([#292]). - [BREAKING] Moved `s3`, `serviceType` and `hdfs` discovery to top-level `clusterConfig` ([#292]). +- `operator-rs` `0.30.1` -> `0.30.2` ([#292]) [#271]: https://github.com/stackabletech/hive-operator/pull/271 [#274]: https://github.com/stackabletech/hive-operator/pull/274 diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc index 55e8f69d..88f099ad 100644 --- a/docs/modules/ROOT/pages/usage.adoc +++ b/docs/modules/ROOT/pages/usage.adoc @@ -199,7 +199,7 @@ spec: replicas: 1 ---- -WARNING: You should not use the `Derby` database with more than one replica or in production. Derby stores data locally and therefore the data not shared between different metastore Pods and lost after Pod restarts. +WARNING: You should not use the `Derby` database with more than one replica or in production. Derby stores data locally and therefore the data is not shared between different metastore Pods and lost after Pod restarts. To create a single node Apache Hive Metastore (v2.3.9) cluster with derby and S3 access, deploy a minio (or use any available S3 bucket): [source,bash] From 829842816eafabb999385d2ba27807500770b252 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 16:23:35 +0100 Subject: [PATCH 11/12] adapted to pr review --- rust/crd/src/lib.rs | 19 ++++++++++--------- rust/operator-binary/src/controller.rs | 11 ++++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 3d4af79b..b13924dc 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -160,6 +160,8 @@ pub struct MetastoreStorageConfig { #[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct MetaStoreConfig { + /// The location of default database for the Hive warehouse. + /// Maps to the `hive.metastore.warehouse.dir` setting. pub warehouse_dir: Option, pub resources: Option>, } @@ -272,7 +274,7 @@ impl Configuration for MetaStoreConfig { fn compute_env( &self, - _resource: &Self::Configurable, + _hive: &Self::Configurable, _role_name: &str, ) -> Result>, ConfigError> { let mut result = BTreeMap::new(); @@ -292,20 +294,20 @@ impl Configuration for MetaStoreConfig { fn compute_cli( &self, - resource: &Self::Configurable, + hive: &Self::Configurable, _role_name: &str, ) -> Result>, ConfigError> { let mut result = BTreeMap::new(); result.insert( Self::DB_TYPE_CLI.to_string(), - Some(resource.spec.cluster_config.database.db_type.to_string()), + Some(hive.spec.cluster_config.database.db_type.to_string()), ); Ok(result) } fn compute_files( &self, - resource: &Self::Configurable, + hive: &Self::Configurable, _role_name: &str, _file: &str, ) -> Result>, ConfigError> { @@ -319,21 +321,20 @@ impl Configuration for MetaStoreConfig { } result.insert( Self::CONNECTION_URL.to_string(), - Some(resource.spec.cluster_config.database.conn_string.clone()), + Some(hive.spec.cluster_config.database.conn_string.clone()), ); result.insert( Self::CONNECTION_USER_NAME.to_string(), - Some(resource.spec.cluster_config.database.user.clone()), + Some(hive.spec.cluster_config.database.user.clone()), ); result.insert( Self::CONNECTION_PASSWORD.to_string(), - Some(resource.spec.cluster_config.database.password.clone()), + Some(hive.spec.cluster_config.database.password.clone()), ); result.insert( Self::CONNECTION_DRIVER_NAME.to_string(), Some( - resource - .spec + hive.spec .cluster_config .database .db_type diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index e35b4cb5..21e48491 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -66,6 +66,8 @@ pub struct Ctx { #[strum_discriminants(derive(strum::IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("object defines no namespace"))] + ObjectHasNoNamespace, #[snafu(display("object defines no metastore role"))] NoMetaStoreRole, #[snafu(display("failed to calculate global service name"))] @@ -177,9 +179,12 @@ pub async fn reconcile_hive(hive: Arc, ctx: Arc) -> Result = if let Some(s3) = &hive.spec.cluster_config.s3 { Some( - s3.resolve(client, hive.namespace().as_deref().unwrap()) - .await - .context(ResolveS3ConnectionSnafu)?, + s3.resolve( + client, + &hive.namespace().ok_or(Error::ObjectHasNoNamespace)?, + ) + .await + .context(ResolveS3ConnectionSnafu)?, ) } else { None From 07708ed7bbb9751dde84ff597f6c97efc96ee1ba Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 10 Jan 2023 16:27:50 +0100 Subject: [PATCH 12/12] regenerated charts --- deploy/helm/hive-operator/crds/crds.yaml | 2 ++ deploy/manifests/crds.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/deploy/helm/hive-operator/crds/crds.yaml b/deploy/helm/hive-operator/crds/crds.yaml index 77e0eaa0..98d30e9d 100644 --- a/deploy/helm/hive-operator/crds/crds.yaml +++ b/deploy/helm/hive-operator/crds/crds.yaml @@ -421,6 +421,7 @@ spec: type: object type: object warehouseDir: + description: The location of default database for the Hive warehouse. Maps to the `hive.metastore.warehouse.dir` setting. nullable: true type: string type: object @@ -645,6 +646,7 @@ spec: type: object type: object warehouseDir: + description: The location of default database for the Hive warehouse. Maps to the `hive.metastore.warehouse.dir` setting. nullable: true type: string type: object diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml index 468251bf..ca479721 100644 --- a/deploy/manifests/crds.yaml +++ b/deploy/manifests/crds.yaml @@ -422,6 +422,7 @@ spec: type: object type: object warehouseDir: + description: The location of default database for the Hive warehouse. Maps to the `hive.metastore.warehouse.dir` setting. nullable: true type: string type: object @@ -646,6 +647,7 @@ spec: type: object type: object warehouseDir: + description: The location of default database for the Hive warehouse. Maps to the `hive.metastore.warehouse.dir` setting. nullable: true type: string type: object