From 48851c3eb302dc6aeb694dc738cfd4e0bb57c781 Mon Sep 17 00:00:00 2001 From: zwzhang0107 Date: Wed, 15 Nov 2023 17:06:57 +0800 Subject: [PATCH] add hadoop-yarn chart Signed-off-by: zwzhang0107 --- charts/hadoop-yarn | 1 + hadoop-yarn/3.3.2-v1.0/Chart.yaml | 6 + hadoop-yarn/3.3.2-v1.0/templates/NOTES.txt | 19 + hadoop-yarn/3.3.2-v1.0/templates/_helpers.tpl | 51 +++ .../templates/hadoop-configmap.yaml | 325 ++++++++++++++++++ hadoop-yarn/3.3.2-v1.0/templates/rbac.yaml | 36 ++ .../templates/yarn-nm-statefulset.yaml | 166 +++++++++ .../3.3.2-v1.0/templates/yarn-nm-svc.yaml | 23 ++ .../templates/yarn-rm-deployment.yaml | 108 ++++++ .../3.3.2-v1.0/templates/yarn-rm-svc.yaml | 33 ++ hadoop-yarn/3.3.2-v1.0/values.yaml | 82 +++++ 11 files changed, 850 insertions(+) create mode 120000 charts/hadoop-yarn create mode 100644 hadoop-yarn/3.3.2-v1.0/Chart.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/NOTES.txt create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/_helpers.tpl create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/hadoop-configmap.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/rbac.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-statefulset.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-svc.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-deployment.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-svc.yaml create mode 100644 hadoop-yarn/3.3.2-v1.0/values.yaml diff --git a/charts/hadoop-yarn b/charts/hadoop-yarn new file mode 120000 index 0000000..cc93c0a --- /dev/null +++ b/charts/hadoop-yarn @@ -0,0 +1 @@ +../hadoop-yarn/3.3.2-v1.0 \ No newline at end of file diff --git a/hadoop-yarn/3.3.2-v1.0/Chart.yaml b/hadoop-yarn/3.3.2-v1.0/Chart.yaml new file mode 100644 index 0000000..1b36301 --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: hadoop-yarn +description: A Helm chart for hadoop yarn. +type: application +version: 3.3.2 +appVersion: "3.3.2" diff --git a/hadoop-yarn/3.3.2-v1.0/templates/NOTES.txt b/hadoop-yarn/3.3.2-v1.0/templates/NOTES.txt new file mode 100644 index 0000000..59f6969 --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/NOTES.txt @@ -0,0 +1,19 @@ +0. Check running state of yarn pods + kubectl get pod -n {{ .Values.installation.namespace }} + +1. You can list the yarn nodes by running this command: + kubectl exec -n {{ .Values.installation.namespace }} -it ${yarn-rm-pod-name} -- /opt/hadoop/bin/yarn node -list + +2. Create a port-forward to the yarn resource manager UI: + kubectl port-forward -n {{ .Values.installation.namespace }} service/{{ .Values.yarn.resourceManager.serviceName }} {{ .Values.yarn.resourceManager.webPort}}:{{.Values.yarn.resourceManager.webPort}} + + Then open the ui in your browser: + + open http://localhost:{{.Values.yarn.resourceManager.webPort}} + +3. You can run included hadoop tests like this: + kubectl exec -n {{ .Values.installation.namespace }} -it ${yarn-rm-pod-name} yarn-rm -- /opt/hadoop/bin/hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-{{ .Values.hadoopVersion }}-tests.jar TestDFSIO -write -nrFiles 5 -fileSize 128MB -resFile /tmp/TestDFSIOwrite.txt + kubectl exec -n {{ .Values.installation.namespace }} -it ${yarn-rm-pod-name} yarn-rm -- /opt/spark/bin/spark-submit --master yarn --deploy-mode cluster --class org.apache.spark.examples.SparkPi /opt/spark/examples/jars/spark-examples_2.12-3.3.3.jar 1000 + +4. You can list the mapreduce jobs like this: + kubectl exec -n {{ .Values.installation.namespace }} -it ${yarn-rm-pod-name} -- /opt/hadoop/bin/mapred job -list diff --git a/hadoop-yarn/3.3.2-v1.0/templates/_helpers.tpl b/hadoop-yarn/3.3.2-v1.0/templates/_helpers.tpl new file mode 100644 index 0000000..2b436ca --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "hadoop-yarn.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "hadoop-yarn.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "hadoop-yarn.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "hadoop-yarn.labels" -}} +helm.sh/chart: {{ include "hadoop-yarn.chart" . }} +{{ include "hadoop-yarn.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "hadoop-yarn.selectorLabels" -}} +app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/hadoop-yarn/3.3.2-v1.0/templates/hadoop-configmap.yaml b/hadoop-yarn/3.3.2-v1.0/templates/hadoop-configmap.yaml new file mode 100644 index 0000000..b6d8d1b --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/hadoop-configmap.yaml @@ -0,0 +1,325 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ .Values.installation.namespace }} + name: {{ include "hadoop-yarn.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + helm.sh/chart: {{ include "hadoop-yarn.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + bootstrap.sh: | + #!/bin/bash -x + + echo Starting + + : ${HADOOP_HOME:=/opt/hadoop} + + echo Using ${HADOOP_HOME} as HADOOP_HOME + + . $HADOOP_HOME/etc/hadoop/hadoop-env.sh + + # ------------------------------------------------------ + # Directory to find config artifacts + # ------------------------------------------------------ + + CONFIG_DIR="/tmp/hadoop-config" + + # ------------------------------------------------------ + # Copy config files from volume mount + # ------------------------------------------------------ + + for f in slaves core-site.xml mapred-site.xml yarn-site.xml hdfs-site.xml; do + if [[ -e ${CONFIG_DIR}/$f ]]; then + cp ${CONFIG_DIR}/$f $HADOOP_HOME/etc/hadoop/$f + else + echo "ERROR: Could not find $f in $CONFIG_DIR" + exit 1 + fi + done + + # ------------------------------------------------------ + # Start RESOURCE MANAGER and PROXY SERVER as daemons + # ------------------------------------------------------ + if [[ "${YARN_ROLE}" =~ "yarn-rm" ]]; then + $HADOOP_HOME/bin/yarn --loglevel {{ .Values.logLevel }} --daemon start resourcemanager + $HADOOP_HOME/bin/yarn --loglevel {{ .Values.logLevel }} --daemon start proxyserver + fi + + # ------------------------------------------------------ + # Start NODE MANAGER + # ------------------------------------------------------ + if [[ "${YARN_ROLE}" =~ "yarn-nm" ]]; then + useradd hadoop + chown root:root $HADOOP_HOME/etc/ + chown root:root $HADOOP_HOME/etc/hadoop/ + chown root:root $HADOOP_HOME/etc/hadoop/container-executor.cfg + echo 'banned.users=bin + allowed.system.users=root,nobody,impala,hive,hdfs,yarn,hadoop + feature.tc.enabled=0 + min.user.id=0 + yarn.nodemanager.linux-container-executor.group=hadoop' > $HADOOP_HOME/etc/hadoop/container-executor.cfg + + chown root:hadoop $HADOOP_HOME/bin/container-executor + chmod 6050 $HADOOP_HOME/bin/container-executor + + sed -i '/<\/configuration>/d' $HADOOP_HOME/etc/hadoop/yarn-site.xml + cat >> $HADOOP_HOME/etc/hadoop/yarn-site.xml <<- EOM + + yarn.nodemanager.resource.memory-mb + ${NM_INIT_MEMORY_MB:-1024} + + + yarn.nodemanager.resource.cpu-vcores + ${NM_INIT_CPU_CORES:-1} + + + yarn.nodemanager.address + ${HOSTNAME}:8041 + + EOM + + # annotate nm id on pod + kubectl annotate pod -n ${POD_NAMESPACE} ${POD_NAME} yarn.hadoop.apache.org/node-id=${HOSTNAME}:8041 + + echo '' >> $HADOOP_HOME/etc/hadoop/yarn-site.xml + + # Wait with timeout for resourcemanager + TMP_URL="http://{{ .Values.yarn.resourceManager.serviceName }}:{{ .Values.yarn.resourceManager.webPort }}/ws/v1/cluster/info" + if timeout 5m bash -c "until curl -sf $TMP_URL; do echo Waiting for $TMP_URL; sleep 5; done"; then + $HADOOP_HOME/bin/yarn nodemanager --loglevel {{ .Values.logLevel }} + else + echo "$0: Timeout waiting for $TMP_URL, exiting." + exit 1 + fi + fi + + # ------------------------------------------------------ + # Start HDFS NAME NODE + # ------------------------------------------------------ + if [[ "${YARN_ROLE}" =~ "hdfs-nn" ]]; then + mkdir -p /tmp/hadoop-root/dfs/name + $HADOOP_HOME/bin/hdfs namenode -format + $HADOOP_HOME/bin/hdfs --loglevel {{ .Values.logLevel }} --daemon start namenode + fi + + # ------------------------------------------------------ + # Start HDFS DATA NODE + # ------------------------------------------------------ + if [[ "${YARN_ROLE}" =~ "hdfs-dn" ]]; then + $HADOOP_HOME/bin/hdfs --loglevel {{ .Values.logLevel }} --daemon start datanode + fi + + # ------------------------------------------------------ + # Tail logfiles for daemonized workloads (parameter -d) + # ------------------------------------------------------ + if [[ $1 == "-d" ]]; then + until find ${HADOOP_HOME}/logs -mmin -1 | egrep -q '.*'; echo "`date`: Waiting for logs..." ; do sleep 2 ; done + tail -F ${HADOOP_HOME}/logs/* & + while true; do sleep 1000; done + fi + + # ------------------------------------------------------ + # Start bash if requested (parameter -bash) + # ------------------------------------------------------ + if [[ $1 == "-bash" ]]; then + /bin/bash + fi + + core-site.xml: | + + + + + fs.defaultFS + hdfs://{{ .Values.yarn.resourceManager.serviceName }}:9000/ + + + + hdfs-site.xml: | + + + + + dfs.namenode.rpc-address + {{ .Values.yarn.resourceManager.serviceName }}:9000 + + + dfs.namenode.rpc-bind-host + 0.0.0.0 + + + dfs.namenode.servicerpc-bind-host + 0.0.0.0 + + + dfs.namenode.datanode.registration.ip-hostname-check + false + + + + mapred-site.xml: | + + + + + + mapreduce.framework.name + yarn + + + mapreduce.jobhistory.address + {{ .Values.yarn.resourceManager.serviceName }}:10020 + + + mapreduce.jobhistory.webapp.address + {{ .Values.yarn.resourceManager.serviceName }}:19888 + + + + slaves: | + localhost + + yarn-site.xml: | + + + + + + yarn.resourcemanager.hostname + {{ .Values.yarn.resourceManager.serviceName }} + + + + + yarn.web-proxy.address + {{ .Values.yarn.resourceManager.serviceName }}:{{ .Values.yarn.resourceManager.webProxy.port}} + + + + + yarn.scheduler.maximum-allocation-vcores + 48 + + + yarn.scheduler.maximum-allocation-mb + 20480 + + + + + yarn.resourcemanager.webapp.address + {{ .Values.yarn.resourceManager.serviceName }}:{{ .Values.yarn.resourceManager.webPort }} + + + + + yarn.log-aggregation-enable + true + + + + yarn.log-aggregation.retain-seconds + 86400 + + + + + yarn.resourcemanager.bind-host + 0.0.0.0 + + + yarn.nodemanager.bind-host + 0.0.0.0 + + + yarn.timeline-service.bind-host + 0.0.0.0 + + + yarn.web-proxy.bind-host + 0.0.0.0 + + + + + yarn.nodemanager.vmem-check-enabled + false + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + + yarn.nodemanager.aux-services.mapreduce_shuffle.class + org.apache.hadoop.mapred.ShuffleHandler + + + + List of directories to store localized files in. + yarn.nodemanager.local-dirs + /var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir + + + + Where to store container logs. + yarn.nodemanager.log-dirs + /var/log/hadoop-yarn/containers + + + + Where to aggregate logs to. + yarn.nodemanager.remote-app-log-dir + /var/log/hadoop-yarn/apps + + + + yarn.application.classpath + + /opt/hadoop/etc/hadoop, + /opt/hadoop/share/hadoop/common/*, + /opt/hadoop/share/hadoop/common/lib/*, + /opt/hadoop/share/hadoop/hdfs/*, + /opt/hadoop/share/hadoop/hdfs/lib/*, + /opt/hadoop/share/hadoop/mapreduce/*, + /opt/hadoop/share/hadoop/mapreduce/lib/*, + /opt/hadoop/share/hadoop/yarn/*, + /opt/hadoop/share/hadoop/yarn/lib/* + + + + + yarn.nodemanager.container-executor.class + org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor + + + yarn.nodemanager.linux-container-executor.resources-handler.class + org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler + + + yarn.nodemanager.linux-container-executor.group + hadoop + + + yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user + hadoop + + + yarn.nodemanager.linux-container-executor.cgroups.mount + false + + + yarn.nodemanager.linux-container-executor.cgroups.mount-path + /host-cgroup/ + + + yarn.nodemanager.linux-container-executor.cgroups.hierarchy + {{ .Values.yarn.config.yarnSite.cgroupsHierarchy }} + + + yarn.nodemanager.resource.memory.enabled + true + + diff --git a/hadoop-yarn/3.3.2-v1.0/templates/rbac.yaml b/hadoop-yarn/3.3.2-v1.0/templates/rbac.yaml new file mode 100644 index 0000000..91c460d --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: yarn-nodemanager + namespace: {{ .Values.installation.namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: yarn-nodemanager-role + namespace: {{ .Values.installation.namespace }} +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - patch + - update + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: yarn-nodemanager-rolebinding + namespace : {{ .Values.installation.namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: yarn-nodemanager-role +subjects: + - kind: ServiceAccount + name: yarn-nodemanager + namespace: {{ .Values.installation.namespace }} \ No newline at end of file diff --git a/hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-statefulset.yaml b/hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-statefulset.yaml new file mode 100644 index 0000000..6f7090d --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-statefulset.yaml @@ -0,0 +1,166 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + namespace: {{ .Values.installation.namespace }} + name: {{ include "hadoop-yarn.fullname" . }}-nm + annotations: + checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }} + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + helm.sh/chart: {{ include "hadoop-yarn.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: node-manager +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: node-manager + minReadySeconds: 10 + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 20% + replicas: 2 + serviceName: {{ .Values.yarn.nodeManager.serviceName }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: node-manager +{{- if .Values.yarn.nodeManager.useBatchResource }} + koordinator.sh/qosClass: BE +{{- end }} + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: "kubernetes.io/hostname" + labelSelector: + matchLabels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: node-manager + serviceAccountName: yarn-nodemanager + setHostnameAsFQDN: true + terminationGracePeriodSeconds: 0 + dnsPolicy: ClusterFirst +{{- if .Values.yarn.nodeManager.useBatchResource }} + priorityClassName: koord-batch +{{- end }} + hostNetwork: false + nodeSelector: +{{ toYaml .Values.yarn.nodeManager.nodeSelector | indent 8 }} + containers: + - name: yarn-nm + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + command: + - "/bin/bash" + - "/tmp/hadoop-config/bootstrap.sh" + - "-d" + resources: +{{- if .Values.yarn.nodeManager.useBatchResource }} + requests: + kubernetes.io/batch-cpu: {{ .Values.yarn.nodeManager.resources.requests.cpu }}k + kubernetes.io/batch-memory: {{ .Values.yarn.nodeManager.resources.requests.memory }} + limits: + kubernetes.io/batch-cpu: {{ .Values.yarn.nodeManager.resources.limits.cpu }}k + kubernetes.io/batch-memory: {{ .Values.yarn.nodeManager.resources.limits.memory }} +{{- else }} + requests: + cpu: {{ .Values.yarn.nodeManager.resources.requests.cpu }} + memory: {{ .Values.yarn.nodeManager.resources.requests.memory }} + limits: + cpu: {{ .Values.yarn.nodeManager.resources.limits.cpu }} + memory: {{ .Values.yarn.nodeManager.resources.limits.memory }} +{{- end }} + readinessProbe: + httpGet: + path: /node + port: 8042 + initialDelaySeconds: 10 + timeoutSeconds: 2 + securityContext: + privileged: true + livenessProbe: + httpGet: + path: /node + port: 8042 + initialDelaySeconds: 10 + timeoutSeconds: 2 + env: + - name: YARN_ROLE + value: yarn-nm + - name: NM_INIT_CPU_CORES + value: "{{ .Values.yarn.nodeManager.initCPUVCores }}" + - name: NM_INIT_MEMORY_MB + value: "{{ .Values.yarn.nodeManager.initMemoryMB }}" + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: hadoop-config + mountPath: /tmp/hadoop-config + - name: host-cgroup-root + mountPath: /host-cgroup/ +{{- if .Values.hdfs.enabled }} + - name: hdfs-dn + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + command: + - "/bin/bash" + - "/tmp/hadoop-config/bootstrap.sh" + - "-d" + resources: +{{- if .Values.yarn.nodeManager.useBatchResource }} + requests: + kubernetes.io/batch-cpu: {{ .Values.hdfs.dataNode.resources.requests.cpu }}k + kubernetes.io/batch-memory: {{ .Values.hdfs.dataNode.resources.requests.memory }} + limits: + kubernetes.io/batch-cpu: {{ .Values.hdfs.dataNode.resources.limits.cpu }}k + kubernetes.io/batch-memory: {{ .Values.hdfs.dataNode.resources.limits.memory }} +{{- else }} + requests: + cpu: {{ .Values.hdfs.dataNode.resources.requests.cpu }} + memory: {{ .Values.hdfs.dataNode.resources.requests.memory }} + limits: + cpu: {{ .Values.hdfs.dataNode.resources.limits.cpu }} + memory: {{ .Values.hdfs.dataNode.resources.limits.memory }} +{{- end }} + readinessProbe: + httpGet: + path: / + port: 8042 + initialDelaySeconds: 10 + timeoutSeconds: 2 + securityContext: + privileged: true + livenessProbe: + httpGet: + path: / + port: 9864 + initialDelaySeconds: 10 + timeoutSeconds: 2 + env: + - name: YARN_ROLE + value: hdfs-dn + volumeMounts: + - name: hadoop-config + mountPath: /tmp/hadoop-config +{{- end }} + volumes: + - name: hadoop-config + configMap: + name: {{ include "hadoop-yarn.fullname" . }} + - name: host-cgroup-root + hostPath: + # set k8s/besteffort for yarn task, mount root dir here since the format is different betweent cgroup fs and systemd + path: /sys/fs/cgroup/ + type: "" \ No newline at end of file diff --git a/hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-svc.yaml b/hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-svc.yaml new file mode 100644 index 0000000..7959a23 --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/yarn-nm-svc.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: {{ .Values.installation.namespace }} + name: {{ .Values.yarn.nodeManager.serviceName }} + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + helm.sh/chart: {{ include "hadoop-yarn.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: node-manager +spec: + ports: + - port: 8088 + name: web + - port: 8082 + name: web2 + - port: 8042 + name: api + clusterIP: None + selector: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: node-manager \ No newline at end of file diff --git a/hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-deployment.yaml b/hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-deployment.yaml new file mode 100644 index 0000000..b934a6b --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-deployment.yaml @@ -0,0 +1,108 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + controle-plane: koordinator + name: {{ .Values.installation.namespace }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "hadoop-yarn.fullname" . }}-rm + namespace: {{ .Values.installation.namespace }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/hadoop-configmap.yaml") . | sha256sum }} + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + helm.sh/chart: {{ include "hadoop-yarn.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: yarn-rm +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: yarn-rm + replicas: 1 + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: yarn-rm + spec: + terminationGracePeriodSeconds: 0 + dnsPolicy: ClusterFirst + # hostNetwork: true + # Let rm pod schedule to node with specific label. + nodeSelector: +{{ toYaml .Values.yarn.resourceManager.nodeSelector | indent 8 }} + containers: + - name: yarn-rm + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - containerPort: {{ .Values.yarn.resourceManager.webPort }} + name: web + command: + - "/bin/bash" + - "/tmp/hadoop-config/bootstrap.sh" + - "-d" + resources: +{{ toYaml .Values.yarn.resourceManager.resources | indent 10 }} + env: + - name: YARN_ROLE + value: yarn-rm + readinessProbe: + httpGet: + path: /ws/v1/cluster/info + port: {{ .Values.yarn.resourceManager.webPort }} + initialDelaySeconds: 5 + timeoutSeconds: 2 + livenessProbe: + failureThreshold: 5 + httpGet: + path: /ws/v1/cluster/info + port: {{ .Values.yarn.resourceManager.webPort }} + initialDelaySeconds: 10 + timeoutSeconds: 2 + volumeMounts: + - name: hadoop-config + mountPath: /tmp/hadoop-config +{{- if .Values.hdfs.enabled }} + - name: hdfs-nn + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - containerPort: {{ .Values.hdfs.nameNode.httpPort }} + name: web + command: + - "/bin/bash" + - "/tmp/hadoop-config/bootstrap.sh" + - "-d" + resources: +{{ toYaml .Values.hdfs.nameNode.resources | indent 10 }} + env: + - name: YARN_ROLE + value: hdfs-nn + readinessProbe: + httpGet: + path: / + port: {{ .Values.hdfs.nameNode.httpPort }} + initialDelaySeconds: 5 + timeoutSeconds: 2 + livenessProbe: + failureThreshold: 5 + httpGet: + path: / + port: {{ .Values.hdfs.nameNode.httpPort }} + initialDelaySeconds: 10 + timeoutSeconds: 2 + volumeMounts: + - name: hadoop-config + mountPath: /tmp/hadoop-config +{{- end }} + volumes: + - name: hadoop-config + configMap: + name: {{ include "hadoop-yarn.fullname" . }} \ No newline at end of file diff --git a/hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-svc.yaml b/hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-svc.yaml new file mode 100644 index 0000000..f355a2d --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/templates/yarn-rm-svc.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: {{ .Values.installation.namespace }} + name: {{ .Values.yarn.resourceManager.serviceName }} + labels: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + helm.sh/chart: {{ include "hadoop-yarn.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: yarn-rm-service +spec: + ports: + - port: {{ .Values.yarn.resourceManager.webPort }} + name: web + - port: 8033 + name: admin-addr + - port: 8031 + name: res-tracker-addr + - port: 8032 + name: addr + - port: 8030 + name: scheduler-addr +{{- if .Values.hdfs.enabled }} + - port: 9000 + name: hdfs-namenode + - port: {{ .Values.hdfs.nameNode.httpPort }} + name: hdfs-http +{{- end }} + type: {{ .Values.yarn.resourceManager.serviceType }} + selector: + app.kubernetes.io/name: {{ include "hadoop-yarn.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: yarn-rm \ No newline at end of file diff --git a/hadoop-yarn/3.3.2-v1.0/values.yaml b/hadoop-yarn/3.3.2-v1.0/values.yaml new file mode 100644 index 0000000..a0aa6e9 --- /dev/null +++ b/hadoop-yarn/3.3.2-v1.0/values.yaml @@ -0,0 +1,82 @@ +installation: + namespace: hadoop-yarn + +image: + repository: registry.cn-hangzhou.aliyuncs.com/koordinator-sh/apache-hadoop + tag: 3.3.2-v1.0 + pullPolicy: Always + +# The version of the hadoop libraries being used in the image. +hadoopVersion: 3.3.2 +logLevel: INFO + +yarn: + resourceManager: + webPort: 8088 + + resources: + requests: + cpu: "1" + memory: "2Gi" + limits: + cpu: "2" + memory: "4Gi" + + webProxy: + port: 8054 + + nodeSelector: + # yarn.hadoop.apache.org/node-role: yarn-rm + + serviceType: ClusterIP + serviceName: resource-manager + + nodeManager: + # CPU and memory resources allocated to each node manager pod. + # This should be tuned to fit your workload. + resources: + requests: + cpu: "1" + memory: "1Gi" + limits: + cpu: "2" + memory: "2Gi" + + # use batch cpu and batch memory for nm pod + useBatchResource: true + + nodeSelector: + # yarn.hadoop.apache.org/node-role: yarn-nm + + serviceName: node-manager + + # initial cpu and memory of nm reported + initCPUVCores: 1 + initMemoryMB: 1024 + + config: + yarnSite: + cgroupsHierarchy: /kubepods.slice/kubepods-besteffort.slice/hadoop-yarn + +# hdfs components only used for demo, will run as independent containers inside yarn pods +hdfs: + enabled: true + nameNode: + httpPort: 9870 + + resources: + requests: + cpu: "1" + memory: "1Gi" + limits: + cpu: "2" + memory: "4Gi" + + dataNode: + resources: + requests: + cpu: "1" + memory: "1Gi" + limits: + cpu: "1" + memory: "1Gi" \ No newline at end of file