-
Notifications
You must be signed in to change notification settings - Fork 7
/
Dockerfile.neuronx
168 lines (144 loc) · 6.12 KB
/
Dockerfile.neuronx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
FROM public.ecr.aws/docker/library/ubuntu:20.04
LABEL dlc_major_version="1"
LABEL maintainer="Amazon AI"
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
# Neuron SDK components version numbers
ARG NEURONX_FRAMEWORK_VERSION=1.13.1.1.16.0
ARG NEURONX_DISTRIBUTED_VERSION=0.9.0
ARG NEURONX_CC_VERSION=2.15.143.0
ARG NEURONX_TRANSFORMERS_VERSION=0.12.313
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.22.33.0-d2128d1aa
ARG NEURONX_RUNTIME_LIB_VERSION=2.22.19.0-5856c0b42
ARG NEURONX_TOOLS_VERSION=2.19.0.0
ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12
ARG TORCHSERVE_VERSION=0.11.0
ARG SM_TOOLKIT_VERSION=2.0.21
ARG MAMBA_VERSION=23.1.0-4
# See http://bugs.python.org/issue19846
ENV LANG=C.UTF-8
ENV LD_LIBRARY_PATH=/opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
ENV PATH=/opt/conda/bin:/opt/aws/neuron/bin:$PATH
ENV SAGEMAKER_SERVING_MODULE=sagemaker_pytorch_serving_container.serving:main
ENV TEMP=/home/model-server/tmp
RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends software-properties-common \
&& add-apt-repository ppa:openjdk-r/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
apt-transport-https \
ca-certificates \
cmake \
curl \
emacs \
git \
jq \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
openjdk-11-jdk \
vim \
wget \
unzip \
zlib1g-dev \
libcap-dev \
gnupg2 \
gpg-agent \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
RUN apt-get update \
&& apt-get install -y \
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean
# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
/var/lib/dpkg/info/ca-certificates-java.postinst configure;
RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
&& chmod +x ~/mambaforge.sh \
&& ~/mambaforge.sh -b -p /opt/conda \
&& rm ~/mambaforge.sh \
&& /opt/conda/bin/conda update -y conda \
&& /opt/conda/bin/conda install -c conda-forge -y \
python=$PYTHON_VERSION \
pyopenssl \
cython \
mkl-include \
mkl \
parso \
typing \
# Below 2 are included in miniconda base, but not mamba so need to install
conda-content-trust \
charset-normalizer \
&& /opt/conda/bin/conda clean -ya
RUN conda install -c conda-forge \
scikit-learn \
h5py \
requests \
&& conda clean -ya \
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
&& pip install packaging \
enum-compat \
ipython
RUN pip install --no-cache-dir -U \
opencv-python>=4.8.1.78 \
"numpy<1.24,>1.21" \
"scipy>=1.8.0" \
six \
"pillow>=10.0.1" \
"awscli<2" \
pandas==1.* \
boto3 \
cryptography
RUN pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com \
neuronx-cc==$NEURONX_CC_VERSION \
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION \
&& pip install -U "protobuf>=3.18.3,<4" \
torchserve==${TORCHSERVE_VERSION} \
torch-model-archiver==${TORCHSERVE_VERSION} \
&& pip install --no-deps --no-cache-dir -U torchvision==0.14.* \
&& pip install --no-deps -U --extra-index-url https://pip.repos.neuron.amazonaws.com neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION
RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp /opt/ml/model \
&& chown -R model-server /home/model-server /opt/ml/model
COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
COPY torchserve-neuron.sh /usr/local/bin/entrypoint.sh
COPY config.properties /home/model-server
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
&& chmod +x /usr/local/bin/neuron-monitor.sh \
&& chmod +x /usr/local/bin/entrypoint.sh
ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
RUN chmod +x /usr/local/bin/deep_learning_container.py
RUN pip install --no-cache-dir "sagemaker-pytorch-inference==${SM_TOOLKIT_VERSION}"
# patch default_pytorch_inference_handler.py to import torch_neuronx
RUN DEST_DIR=$(python -c "import os.path, sagemaker_pytorch_serving_container; print(os.path.dirname(sagemaker_pytorch_serving_container.__file__))") \
&& DEST_FILE=${DEST_DIR}/default_pytorch_inference_handler.py \
&& sed -i "s/import torch/import torch, torch_neuronx/" ${DEST_FILE}
RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance* \
# conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
&& rm -rf ${HOME_DIR}/.cache/conda
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.13/license.txt -o /license.txt
EXPOSE 8080 8081
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["/usr/local/bin/entrypoint.sh"]