forked from NVIDIA/Megatron-LM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.ci.lts
86 lines (75 loc) · 2.84 KB
/
Dockerfile.ci.lts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# syntax=docker/dockerfile:1.3-labs
ARG FROM_IMAGE_NAME
FROM $FROM_IMAGE_NAME as build_causal_conv1d
WORKDIR /opt
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/Dao-AILab/causal-conv1d.git@v1.2.2.post1
FROM $FROM_IMAGE_NAME as build_grouped_gemm
WORKDIR /opt
RUN pip3 wheel -v git+https://github.com/fanshiqing/grouped_gemm@v1.1.2
FROM $FROM_IMAGE_NAME as build_mamba_ssm
WORKDIR /opt
RUN MAMBA_FORCE_BUILD=TRUE pip3 wheel -v git+https://github.com/state-spaces/mamba.git@v2.0.3
FROM $FROM_IMAGE_NAME as main
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends gettext python3-venv && \
apt-get clean && \
python -m venv /opt/jet && \
wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq && \
chmod a+x /usr/local/bin/yq
COPY --from=build_causal_conv1d /opt/causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl ./
COPY --from=build_grouped_gemm /opt/grouped_gemm-1.1.2-cp310-cp310-linux_x86_64.whl ./
COPY --from=build_mamba_ssm /opt/mamba_ssm-2.0.3-cp310-cp310-linux_x86_64.whl ./
RUN pip3 uninstall -y nvidia-modelopt[torch] && \
pip3 install --extra-index-url https://pypi.nvidia.com --no-cache-dir --upgrade-strategy only-if-needed -v \
einops \
flask-restful \
nltk \
pytest \
pytest-cov \
pytest_mock \
pytest-random-order \
sentencepiece \
tiktoken \
wrapt \
zarr \
wandb \
triton==2.1.0 \
causal_conv1d-1.2.2.post1-cp310-cp310-linux_x86_64.whl \
mamba_ssm-2.0.3-cp310-cp310-linux_x86_64.whl \
grouped_gemm-1.1.2-cp310-cp310-linux_x86_64.whl \
tensorstore==0.1.45 \
"nvidia-modelopt[torch]>=0.19.0" && \
rm *.whl
# Since megatron does not have any dependencies (and isn't a dependency to any other package), we can install it separately to make everything a bit quicker
ARG MCORE_REPO
ARG MCORE_REF
ARG MCORE_BACKWARDS_REF
RUN <<"EOF" bash -exu
# Checkout latest
cd /opt
rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm
git init
git remote add origin ${MCORE_REPO}
git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
git fetch origin $MCORE_REF
git checkout $MCORE_REF
# Checkout backwards-ref
cd /opt
rm -rf /opt/megatron-lm-$MCORE_BACKWARDS_REF; mkdir megatron-lm-$MCORE_BACKWARDS_REF; cd megatron-lm-$MCORE_BACKWARDS_REF
git init
git remote add origin ${MCORE_REPO}
git fetch origin $MCORE_BACKWARDS_REF
git checkout $MCORE_BACKWARDS_REF
rm -rf megatron; cp -a /opt/megatron-lm/megatron ./
EOF
RUN pip install -e /opt/megatron-lm
ENV PYTHONPATH="/opt/megatron-lm:$PYTHONPATH"
##### For NVIDIANS only #####
FROM main as jet
ARG CACHEBUST=0
RUN --mount=type=secret,id=JET_INDEX_URLS \
JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS) && \
pip install jet-api jet-client --upgrade $JET_INDEX_URLS
ENV PATH="$PATH:/opt/jet/bin"
###