-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathDockerfile.pax
153 lines (127 loc) · 5.16 KB
/
Dockerfile.pax
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# syntax=docker/dockerfile:1-labs
ARG BASE_IMAGE=ghcr.io/nvidia/jax-mealkit:jax
ARG URLREF_PAXML=https://github.com/google/paxml.git#main
ARG URLREF_PRAXIS=https://github.com/google/praxis.git#main
ARG URLREF_LINGVO=https://github.com/tensorflow/lingvo.git#master
ARG SRC_PATH_PAXML=/opt/paxml
ARG SRC_PATH_PRAXIS=/opt/praxis
ARG SRC_PATH_LINGVO=/opt/lingvo
###############################################################################
## build tensorflow-text and lingvo, which do not have working arm64 pip wheels
###############################################################################
ARG BASE_IMAGE
FROM ${BASE_IMAGE} as wheel-builder
#------------------------------------------------------------------------------
# build lingvo
#------------------------------------------------------------------------------
# Remove Lingvo build from source when it has py-3.12 wheels for x86/arm64
FROM wheel-builder as lingvo-builder
ARG URLREF_LINGVO
ARG SRC_PATH_LINGVO
ENV USE_BAZEL_VERSION=7.1.2
# build lingvo
RUN <<"EOF" bash -exu -o pipefail
git-clone.sh ${URLREF_LINGVO} ${SRC_PATH_LINGVO}
pushd ${SRC_PATH_LINGVO}
CPU_ARCH="$(dpkg --print-architecture)"
if [[ "${CPU_ARCH}" == "arm64" ]]; then
# Use aarch distribution of protobufs
patch -p1 <<"EOFINNER"
diff --git a/lingvo/repo.bzl b/lingvo/repo.bzl
index ce65822d2..d9c0277aa 100644
--- a/lingvo/repo.bzl
+++ b/lingvo/repo.bzl
@@ -232,9 +232,9 @@ filegroup(
)
""",
urls = [
- "https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip",
+ "https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-aarch_64.zip",
],
- sha256 = "3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6",
+ sha256 = "a584286dfa8ebb17032ece206ed74d5e9931e2edb9016e427be2a0dab3b21071",
)
def icu():
EOFINNER
fi
pip install tensorflow_datasets==4.9.2 auditwheel tensorflow==2.18.0
for pattern in \
"s|tensorflow=|#tensorflow=|g" \
"s|dataclasses=|#dataclasses=|g" \
"s|==.*||g" \
; do
sed -i "${pattern}" ${SRC_PATH_LINGVO}/docker/dev.requirements.txt
done
# Lingvo support only python < 3.12, so we hack it and update dependencies
# to be able to build for py-3.12
for pattern in \
"s|tensorflow-text~=2.13.0|tensorflow-text~=2.18.1|g" \
"s|tensorflow~=2.13.0|tensorflow~=2.18.0|g" \
"s|python_requires='>=3.8,<3.11'|python_requires='>=3.8,<3.13'|" \
; do
sed -i "${pattern}" ${SRC_PATH_LINGVO}/pip_package/setup.py;
done
pip install -r docker/dev.requirements.txt
# Some tests are flaky right now, so we skip running the tests.
BUILD_ARCH="x86_64"
if [[ "$CPU_ARCH" == "arm64" ]]; then
BUILD_ARCH="aarch64";
fi
sed -i 's/manylinux2014_x86_64/manylinux_2_38_'"${BUILD_ARCH}"'/' pip_package/build.sh
SKIP_TESTS=1 PYTHON_MINOR_VERSION=$(python --version | cut -d ' ' -f 2 | cut -d '.' -f 2) pip_package/build.sh
EOF
###############################################################################
## Pax for AArch64
###############################################################################
ARG BASE_IMAGE
FROM ${BASE_IMAGE} as mealkit
ARG URLREF_PAXML
ARG URLREF_PRAXIS
ARG SRC_PATH_PAXML
ARG SRC_PATH_PRAXIS
# Preserve version information of tensorflow-text and lingvo
COPY --from=lingvo-builder /opt/manifest.d/git-clone.yaml /opt/manifest.d/git-clone.yaml
COPY --from=lingvo-builder /tmp/lingvo/dist/lingvo*-linux*.whl /opt/
RUN echo "lingvo @ file://$(ls /opt/lingvo*.whl)" >> /opt/pip-tools.d/requirements-paxml.in
# paxml + praxis
RUN <<"EOF" bash -ex
echo "tensorflow_datasets==4.9.2" >> /opt/pip-tools.d/requirements-paxml.in
echo "auditwheel" >> /opt/pip-tools.d/requirements-paxml.in
git-clone.sh ${URLREF_PAXML} ${SRC_PATH_PAXML}
git-clone.sh ${URLREF_PRAXIS} ${SRC_PATH_PRAXIS}
echo "-e file://${SRC_PATH_PAXML}[gpu]" >> /opt/pip-tools.d/requirements-paxml.in
echo "-e file://${SRC_PATH_PRAXIS}" >> /opt/pip-tools.d/requirements-paxml.in
for src in ${SRC_PATH_PAXML} ${SRC_PATH_PRAXIS}; do
pushd ${src}
for pattern in \
"s| @ git+https://github.com/google/flax||g" \
"s| @ git+https://github.com/google/jax||g" \
"s| @ git+https://github.com/google/fiddle||g" \
"s|^tensorflow|#tensorflow|" \
"s|^lingvo|#lingvo|" \
"s|^scikit-learn|#scikit-learn|" \
"s|^protobuf|#protobuf|" \
"s|^numpy|#numpy|" \
"s|^orbax-checkpoint|#orbax-checkpoint|" \
"s| @ git+https://github.com/google/CommonLoopUtils||g" \
; do
sed -i "${pattern}" */pip_package/requirements.txt requirements.in
done
if git diff --quiet; then
echo "broken dependencies no longer present in ${src}"
exit 1
else
git commit -a -m "remove broken dependencies from ${src}"
fi
popd
done
sed -i 's/pysimdjson==[0-9.]*/pysimdjson/' ${SRC_PATH_PAXML}/setup.py
EOF
ADD test-pax.sh /usr/local/bin
###############################################################################
## Install accumulated packages from the base image and the previous stage
###############################################################################
FROM mealkit as final
RUN pip-finalize.sh
# When lingvo wheels are published on pypi.org, revert this
# Dockerfile to 5c4b687b918e6569bca43758c346ad8e67460154