Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jumpstart / git-caching #15900

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 10 additions & 51 deletions test/make_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,60 +51,19 @@ def build_dist():


def download_cache(wait=False):
'''Download pre-built webpack for current git SHA from GitHub

These are produced by .github/workflows/build-dist.yml for every PR and push.
This is a lot faster than having to npm install and run webpack.

Returns True when successful, or False if the download isn't available.
This can happen because dist/ already exists, or the current directory is not a git checkout,
or it is a SHA which is not pushed/PRed.
'''
try:
sha = subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip()
except subprocess.CalledProcessError:
message("make_dist: not a git repository")
return False

if subprocess.call(["git", "diff", "--quiet", "--", ":^test", ":^packit.yaml", ":^.github"]) > 0:
message("make_dist: uncommitted local changes, skipping download")
return False

dist_git_checkout = os.path.join(os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "cockpit-dev", CACHE_REPO + ".git")

if not os.path.exists(dist_git_checkout):
message(f"make_dist: Creating dist cache {dist_git_checkout}")
subprocess.check_call(["git", "init", "--bare", "--quiet", dist_git_checkout])
subprocess.check_call(["git", "--git-dir", dist_git_checkout, "remote", "add", "origin", "https://github.com/" + CACHE_REPO])

tag = "sha-" + sha

retries = 50 if wait else 1 # 25 minutes, once every 30s
while retries > 0:
tries = 50 if wait else 1 # 25 minutes, once every 30s
for retry in range(tries):
try:
subprocess.check_call(["git", "--git-dir", dist_git_checkout, "fetch", "--no-tags", "--depth=1", "origin", "tag", tag])
break
except subprocess.CalledProcessError:
retries -= 1

if retries == 0:
message(f"make_dist: Downloading pre-built dist for SHA {sha} failed")
return False

message(f"make_dist: pre-built dist for {sha} not yet available, waiting...")
subprocess.check_call(["tools/webpack-jumpstart"])
allisonkarlitskaya marked this conversation as resolved.
Show resolved Hide resolved
return True
except subprocess.CalledProcessError as e:
if e.returncode != 2 or not wait:
break
message("make_dist: pre-built dist not yet available, waiting...")
time.sleep(30)

for unpack_path in ["node_modules", "package-lock.json", "dist", "tools/debian/copyright"]:
if os.path.exists(unpack_path):
continue
message(f"make_dist: Extracting cached {unpack_path}...")
p_git = subprocess.Popen(["git", "--git-dir", dist_git_checkout, "archive", tag, unpack_path],
stdout=subprocess.PIPE)
subprocess.check_call(["tar", "-x", "--touch"], stdin=p_git.stdout)
result = p_git.wait()
assert result == 0

return True
message("make_dist: Downloading pre-built dist failed")
return False


def make_dist(download_only=False, wait_download=False):
Expand Down
2 changes: 1 addition & 1 deletion tools/build-debian-copyright
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def module_copyright(moddir):
mod = os.path.basename(moddir)
copyrights = set()
try:
out = subprocess.check_output(['env', '-u', 'LANGUAGE', 'LC_ALL=C.UTF-8', 'grep', '-hri', r'copyright.*\([1-9][0-9]\+\|(c)\)'],
out = subprocess.check_output(['env', '-u', 'LANGUAGE', 'LC_ALL=C.UTF-8', 'grep', '-hri', '--binary-files=without-match', r'copyright.*\([1-9][0-9]\+\|(c)\)'],
cwd=moddir).decode('UTF-8')
for line in out.splitlines():
# weed out some noise
Expand Down
110 changes: 110 additions & 0 deletions tools/git-utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# doesn't do anything on its own. must be sourced.
allisonkarlitskaya marked this conversation as resolved.
Show resolved Hide resolved

# The script which sources this script must set the following variables:
# GITHUB_REPO = the relative repo name of the submodule on github
# SUBDIR = the location in the working tree where the submodule goes
# We also expect `set -eu`.
[ -n "${GITHUB_REPO}" ]
[ -n "${SUBDIR}" ]

GITHUB_BASE="${GITHUB_BASE:-cockpit-project/cockpit}"
GITHUB_REPOSITORY="${GITHUB_BASE%/*}/${GITHUB_REPO}"
HTTPS_REMOTE="https://github.com/${GITHUB_REPOSITORY}"

CACHE_DIR="${XDG_CACHE_HOME-${HOME}/.cache}/cockpit-dev/${GITHUB_REPOSITORY}.git"

if [ "${V-}" = 0 ]; then
message() { printf " %-8s %s\n" "$1" "$2"; }
martinpitt marked this conversation as resolved.
Show resolved Hide resolved
quiet='--quiet'
else
message() { :; }
quiet=''
fi

# runs a git command on the cache dir
git_cache() {
git --git-dir "${CACHE_DIR}" "$@"
}

# reads the named gitlink from the current state of the index
# returns (ie: prints) a 40-character commit ID
get_index_gitlink() {
if ! git ls-files -s "$1" | egrep -o '\<[[:xdigit:]]{40}\>'; then
echo "*** couldn't read gitlink for file $1 from the index" >&2
exit 1
fi
}

init_cache() {
if [ ! -d "${CACHE_DIR}" ]; then
message INIT "${CACHE_DIR}"
mkdir -p "${CACHE_DIR}"
git init --bare --template='' ${quiet} "${CACHE_DIR}"
git_cache remote add origin "${HTTPS_REMOTE}"
fi
}

# This checks if the given argument "$1" (already) exists in the repository
# we use git fsck to to avoid problems with incomplete fetches: we want to make
# sure the complete commit is there
check_ref() {
allisonkarlitskaya marked this conversation as resolved.
Show resolved Hide resolved
git_cache fsck --no-dangling --connectivity-only "$1" 2>/dev/null
}

# Fetch a specific commit ID into the cache
# Either we have this commit available locally (in which case this function
# does nothing), or we need to fetch it. There's no chance that the object
# changed on the server, because we define it by its checksum.
fetch_sha_to_cache() {
sha="$1"

init_cache
# No "offline mode" here: we either have the commit, or we don't
if ! check_ref "${sha}"; then
message FETCH "${SUBDIR} [ref: ${sha}]"
git_cache fetch --no-tags ${quiet} origin "${sha}"
# tag it to keep it from being GC'd.
git_cache tag "sha-${sha}" "${sha}"
allisonkarlitskaya marked this conversation as resolved.
Show resolved Hide resolved
fi
}

# General purpose "fetch" function to be used with tags, refs, or nothing at
# all (to fetch everything). This checks the server for updates, because all
# of those things might change at any given time. Supports an "offline" mode
# to skip the fetch and use the possibly-stale local version, if we have it.
fetch_to_cache() {
martinpitt marked this conversation as resolved.
Show resolved Hide resolved
# We're fetching a named ref (or all refs), which means:
# - we should always do the fetch because it might have changed. but
# - we might be able to skip updating in case we already have it
init_cache
if [ -z "${OFFLINE-}" ]; then
message FETCH "${SUBDIR} ${1+[ref: $*]}"
git_cache fetch --prune ${quiet} origin "$@"
fi
}

# Consistency checking: for a given cache commit "$1", check if it contains a
# file "$2" which is equal to the file "$3" present in the working tree.
cmp_from_cache() {
allisonkarlitskaya marked this conversation as resolved.
Show resolved Hide resolved
git_cache cat-file blob "$1:$2" | cmp "$3"
}

# Like `git clone` except that it uses the original origin url and supports
# checking out commit IDs as detached heads. The target directory must either
# be empty, or not exist.
clone_from_cache() {
message CLONE "${SUBDIR} [ref: $1]"
[ ! -e "${SUBDIR}" ] || rmdir "${SUBDIR}"
mkdir "${SUBDIR}"
cp -a --reflink=auto "${CACHE_DIR}" "${SUBDIR}/.git"
git --git-dir "${SUBDIR}/.git" config --unset core.bare
git -c advice.detachedHead=false -C "${SUBDIR}" checkout ${quiet} "$1"
}

# This copies the files without setting up the git repository. The copied
# files are expected to be in a same-named subdirectory inside the cache
# repository.
unpack_from_cache() {
message "UNPACK" "${SUBDIR} [ref: $1]"
git_cache archive "$1" "${SUBDIR}" | tar -x --touch "${SUBDIR}"
}
18 changes: 11 additions & 7 deletions tools/make-bots
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,19 @@
# Prepare bots by creating ./bots directory
# Specify $COCKPIT_BOTS_REF to checkout non-master branch

set -e
GITHUB_REPO='bots'
SUBDIR='bots'

V="${V-0}" # default to friendly messages

set -eu
cd "$(realpath -m "$0"/../..)"
. tools/git-utils.sh

if [ ! -d bots ]; then
git clone --quiet --reference-if-able ${XDG_CACHE_HOME:-$HOME/.cache}/cockpit-project/bots https://github.com/cockpit-project/bots.git
if [ -n "$COCKPIT_BOTS_REF" ]; then
git -C bots fetch --quiet --depth=1 origin $COCKPIT_BOTS_REF
git -C bots checkout --quiet FETCH_HEAD
fi
echo "checked out bots/ ref $(git -C bots rev-parse HEAD)"
[ -n "${quiet}" ] || set -x
fetch_to_cache # it's small, so keep everything cached
clone_from_cache "${COCKPIT_BOTS_REF-master}"
allisonkarlitskaya marked this conversation as resolved.
Show resolved Hide resolved
else
echo "bots/ already exists, skipping"
fi
54 changes: 10 additions & 44 deletions tools/node-modules
Original file line number Diff line number Diff line change
@@ -1,23 +1,11 @@
#!/bin/sh

set -eu

GITHUB_BASE="${GITHUB_BASE:-cockpit-project/cockpit}"
GITHUB_REPOSITORY="${GITHUB_BASE%/*}/node-cache"
HTTPS_REMOTE="https://github.com/${GITHUB_REPOSITORY}"
GITHUB_REPO='node-cache'
SUBDIR='node_modules'

CACHE_DIR="${XDG_CACHE_HOME-${HOME}/.cache}/cockpit-dev/${GITHUB_REPOSITORY}.git"

message() {
[ "${V-}" != 0 ] || printf " %-8s %s\n" "$1" "$2"
}

get_index_gitlink() {
if ! git ls-files -s "$1" | egrep -o '\<[[:xdigit:]]{40}\>'; then
echo "*** couldn't read gitlink for file $1 from the index" >&2
exit 1
fi
}
set -eu
cd "$(realpath -m "$0"/../..)"
. tools/git-utils.sh

cmd_remove() {
# if we did this for ourselves the rm is enough, but it might be the case
Expand All @@ -32,33 +20,14 @@ cmd_remove() {
cmd_checkout() {
# we default to check out the node_modules corresponding to the gitlink in the index
local sha="${1-$(get_index_gitlink node_modules)}"
local tag="sha-${sha}"

if [ ! -d "${CACHE_DIR}" ]; then
message INIT "${CACHE_DIR}"
mkdir -p "${CACHE_DIR}"
git init --bare ${quiet} "${CACHE_DIR}"
git --git-dir "${CACHE_DIR}" remote add origin "${HTTPS_REMOTE}"
fi

# fetch the tag if we don't already have it
if ! git --git-dir "${CACHE_DIR}" rev-parse --verify --quiet "${tag}" >/dev/null; then
message FETCH "node_modules [sha: ${sha}]"
# fetch by sha to prevent us from downloading something we don't want
git --git-dir "${CACHE_DIR}" fetch ${quiet} origin --no-tags "${sha}"
# create the tag locally for ourselves
git --git-dir "${CACHE_DIR}" tag "sha-${sha}" "${sha}"
fi

# paranoid: double check that the tag is what it claims to be
test "$(git --git-dir "${CACHE_DIR}" rev-parse "${tag}")" = "${sha}"
# fetch by sha to prevent us from downloading something we don't want
fetch_sha_to_cache "${sha}"

# verify that our package.json is equal to the one the cached node_modules
# was created with, unless --force is given
if [ "${1-}" != "--force" ]; then
local our_package_json_sha="$(cat package.json | sha256sum)"
local their_package_json_sha="$(git --git-dir "${CACHE_DIR}" cat-file -p "${sha}":.package.json | sha256sum)"
if [ "${our_package_json_sha}" != "${their_package_json_sha}" ]; then
if ! cmp_from_cache "${sha}" '.package.json' 'package.json'; then
cat >&2 <<EOF

*** node_modules ${sha} doesn't match our package.json
Expand All @@ -82,8 +51,7 @@ EOF

# and check out the new one
# we need to use the tag name here, unfortunately
message UNPACK "node_modules [sha: ${sha}]"
git -c advice.detachedHead=false clone ${quiet} "${CACHE_DIR}" -b "${tag}" node_modules
clone_from_cache "${sha}"
}

cmd_rebuild() {
Expand Down Expand Up @@ -197,7 +165,6 @@ main() {
cd "${top_srcdir}"

local cmd="${1-}"
local quiet=''

if [ -z "${cmd}" ]; then
# don't list the "private" ones
Expand All @@ -209,8 +176,7 @@ main() {
fi

shift
[ "${V-}" != 0 ] || quiet='--quiet'
[ "${V-}" = 0 ] || set -x
[ -n "${quiet}" ] || set -x
"cmd_$cmd" "$@"
}

Expand Down
50 changes: 50 additions & 0 deletions tools/webpack-jumpstart
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/sh

# Download pre-built webpack for current git SHA from GitHub
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should say dist/. As you noticed today, there's dist/guide which isn't a webpack, and the corresponding workflow is also called build-dist, not build-webpack. I wouldn't mind to rename the script accordingly as well (dist-jumpstart, or download-dist), but I feel for this PR I've already used up my quota of name bikeshedding; I can certainly live with the current one 😁


# These are produced by .github/workflows/build-dist.yml for every PR and push.
# This is a lot faster than having to npm install and run webpack.

# Returns 0 when successful, 1 in case of an error, or 2 in case the cache
# entry couldn't be found (but might be available after waiting a bit longer).

GITHUB_REPO='cockpit-dist'
SUBDIR='dist'

export V="${V-0}"

set -eu
cd "$(realpath -m "$0"/../..)"
. tools/git-utils.sh

[ -n "${quiet}" ] || set -x

tools/node-modules make_package_lock_json

if [ -e dist ]; then
echo "jumpstart: dist/ already exists, skipping" >&2
exit 1
fi

if [ "${NODE_ENV-}" = "development" ]; then
echo 'jumpstart: only works with production builds (NODE_ENV != development)' >&2
exit 1
fi

if ! git diff --quiet -- ':^test' ':^packit.yaml' ':^.github'; then
echo 'jumpstart: uncommitted local changes, skipping download' >&2
exit 1
fi

tag="sha-$(git rev-parse HEAD)"
if ! fetch_to_cache tag "${tag}"; then
echo "There is no cache entry ${tag}" >&2
exit 2
fi

if ! cmp_from_cache "${tag}" "package-lock.json" "package-lock.json"; then
echo "The cached package-lock.json doesn't match our own" >&2
exit 1
fi

unpack_from_cache "${tag}"