forked from apache/airflow-ci
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
132 lines (114 loc) · 4.08 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. AT
# THIS POINT, THIS IS ONLY INTENDED FOR USE IN AUTOMATED TESTS.
FROM ubuntu:xenial
USER root
ENV DEBIAN_FRONTEND noninteractive
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8
ENV HADOOP_VERSION 2.6.0
ENV HADOOP_DISTRO=cdh
ENV HADOOP_HOME=/tmp/hadoop-${HADOOP_DISTRO}
ENV HIVE_HOME=/tmp/hive
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
RUN mkdir ${HADOOP_HOME} && \
mkdir ${HIVE_HOME} && \
mkdir /tmp/minicluster && \
mkdir -p /user/hive/warehouse && \
chmod -R 777 ${HIVE_HOME} && \
chmod -R 777 /user/
# Add nodejs repo and key
ADD nodesource.gpg.key /tmp/nodesource.gpg.key
RUN apt-key add /tmp/nodesource.gpg.key
RUN echo 'deb http://deb.nodesource.com/node_8.x xenial main' > /etc/apt/sources.list.d/nodesource.list
RUN echo 'deb-src http://deb.nodesource.com/node_8.x xenial main' >> /etc/apt/sources.list.d/nodesource.list
RUN apt-get update && apt-get install --no-install-recommends -y \
openjdk-8-jdk \
wget curl \
gcc \
g++ \
python-dev \
python3-dev \
python-pip \
python3-pip \
python-virtualenv \
python3-venv \
python-setuptools \
python-pkg-resources \
python3-setuptools \
python3-pkg-resources \
make \
nodejs \
vim \
less \
git \
unzip \
sudo \
ldap-utils \
mysql-client-core-5.7 \
mysql-client-5.7 \
libmysqlclient-dev \
postgresql-client \
sqlite3 \
libkrb5-dev \
libsasl2-dev \
krb5-user \
openssh-client \
openssh-server \
python-selinux \
sasl2-bin \
libsasl2-2 \
libsasl2-dev \
libsasl2-modules \
locales \
&& rm -rf /var/lib/apt/lists/*
RUN sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
&& locale-gen \
&& update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
# Install Hadoop
# --absolute-names is a work around to avoid this issue https://github.com/docker/hub-feedback/issues/727
RUN cd /tmp && \
wget -q https://archive.cloudera.com/cdh5/cdh/5/hadoop-${HADOOP_VERSION}-cdh5.11.0.tar.gz && \
tar xzf hadoop-${HADOOP_VERSION}-cdh5.11.0.tar.gz --absolute-names --strip-components 1 -C $HADOOP_HOME && \
rm hadoop-${HADOOP_VERSION}-cdh5.11.0.tar.gz
# Install Hive
RUN cd /tmp && \
wget -q https://archive.cloudera.com/cdh5/cdh/5/hive-1.1.0-cdh5.11.0.tar.gz && \
tar xzf hive-1.1.0-cdh5.11.0.tar.gz --strip-components 1 -C $HIVE_HOME && \
rm hive-1.1.0-cdh5.11.0.tar.gz
# Install MiniCluster
RUN cd /tmp && \
wget -q https://github.com/bolkedebruin/minicluster/releases/download/1.1/minicluster-1.1-SNAPSHOT-bin.zip && \
unzip minicluster-1.1-SNAPSHOT-bin.zip -d /tmp && \
rm minicluster-1.1-SNAPSHOT-bin.zip
RUN adduser airflow && \
echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow && \
chmod 0440 /etc/sudoers.d/airflow
# Install Python requirements
RUN sudo -H pip install --upgrade pip && \
sudo -H pip install wheel tox && \
sudo -H pip3 install --upgrade pip && \
sudo -H pip3 install wheel tox && \
rm -rf ~/.cache
EXPOSE 8080
WORKDIR /home/airflow
ENV PATH "$PATH:/tmp/hive/bin:$ADDITIONAL_PATH"
USER airflow