Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] add "sticky builds" functionality #949

Merged
merged 10 commits into from
Sep 25, 2019
14 changes: 14 additions & 0 deletions binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,19 @@ def _valid_badge_base_url(self, proposal):
config=True,
)

sticky_builds = Bool(
False,
help="""
Attempt to assign builds for the same repository to the same node.

In order to speed up re-builds of a repository all its builds will
be assigned to the same node in the cluster.

Note: This feature only works if you also enable docker-in-docker support.
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
""",
config=True,
)

use_registry = Bool(
True,
help="""
Expand Down Expand Up @@ -555,6 +568,7 @@ def initialize(self, *args, **kwargs):
"build_image": self.build_image,
'build_node_selector': self.build_node_selector,
'build_pool': self.build_pool,
"sticky_builds": self.sticky_builds,
'log_tail_lines': self.log_tail_lines,
'pod_quota': self.pod_quota,
'per_repo_quota': self.per_repo_quota,
Expand Down
94 changes: 73 additions & 21 deletions binderhub/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from tornado.ioloop import IOLoop
from tornado.log import app_log

from .utils import rendezvous_rank


class Build:
"""Represents a build of a git repository into a docker image.
Expand All @@ -36,7 +38,7 @@ class Build:
"""
def __init__(self, q, api, name, namespace, repo_url, ref, git_credentials, build_image,
image_name, push_secret, memory_limit, docker_host, node_selector,
appendix='', log_tail_lines=100):
appendix='', log_tail_lines=100, sticky_builds=False):
self.q = q
self.api = api
self.repo_url = repo_url
Expand All @@ -56,6 +58,10 @@ def __init__(self, q, api, name, namespace, repo_url, ref, git_credentials, buil
self.stop_event = threading.Event()
self.git_credentials = git_credentials

self.sticky_builds = sticky_builds

self._component_label = "binderhub-build"

def get_cmd(self):
"""Get the cmd to run to build the image"""
cmd = [
Expand Down Expand Up @@ -144,8 +150,71 @@ def progress(self, kind, obj):
"""Put the current action item into the queue for execution."""
self.main_loop.add_callback(self.q.put, {'kind': kind, 'payload': obj})

def get_affinity(self):
"""Determine the affinity term for the build pod.

There are a two affinity strategies, which one is used depends on how
the BinderHub is configured.

In the default setup the affinity of each build pod is an "anti-affinity"
which causes the pods to prefer to schedule on separate nodes.

In a setup with docker-in-docker enabled pods for a particular
repository prefer to schedule on the same node in order to reuse the
docker layer cache of previous builds.
"""
dind_pods = self.api.list_namespaced_pod(
self.namespace,
label_selector="component=dind,app=binder",
)

if self.sticky_builds and dind_pods:
node_names = [pod.spec.node_name for pod in dind_pods.items]
ranked_nodes = rendezvous_rank(node_names, self.repo_url)
best_node_name = ranked_nodes[0]

affinity = client.V1Affinity(
node_affinity=client.V1NodeAffinity(
preferred_during_scheduling_ignored_during_execution=[
client.V1PreferredSchedulingTerm(
weight=100,
preference=client.V1NodeSelectorTerm(
match_fields=[
client.V1NodeSelectorRequirement(
key="metadata.name",
operator="In",
values=[best_node_name],
)
]
),
)
]
)
)

else:
affinity = client.V1Affinity(
pod_anti_affinity=client.V1PodAntiAffinity(
preferred_during_scheduling_ignored_during_execution=[
client.V1WeightedPodAffinityTerm(
weight=100,
pod_affinity_term=client.V1PodAffinityTerm(
topology_key="kubernetes.io/hostname",
label_selector=client.V1LabelSelector(
match_labels=dict(
component=self._component_label
)
)
)
)
]
)
)

return affinity

def submit(self):
"""Submit a image spec to openshift's s2i and wait for completion """
"""Submit a build pod to create the image for the repository."""
volume_mounts = [
client.V1VolumeMount(mount_path="/var/run/docker.sock", name="docker-socket")
]
Expand All @@ -166,13 +235,12 @@ def submit(self):
if self.git_credentials:
env.append(client.V1EnvVar(name='GIT_CREDENTIAL_ENV', value=self.git_credentials))

component_label = "binderhub-build"
self.pod = client.V1Pod(
metadata=client.V1ObjectMeta(
name=self.name,
labels={
"name": self.name,
"component": component_label,
"component": self._component_label,
},
annotations={
"binder-repo": self.repo_url,
Expand Down Expand Up @@ -211,23 +279,7 @@ def submit(self):
node_selector=self.node_selector,
volumes=volumes,
restart_policy="Never",
affinity=client.V1Affinity(
pod_anti_affinity=client.V1PodAntiAffinity(
preferred_during_scheduling_ignored_during_execution=[
client.V1WeightedPodAffinityTerm(
weight=100,
pod_affinity_term=client.V1PodAffinityTerm(
topology_key="kubernetes.io/hostname",
label_selector=client.V1LabelSelector(
match_labels=dict(
component=component_label
)
)
)
)
]
)
)
affinity=self.get_affinity()
)
)

Expand Down
3 changes: 2 additions & 1 deletion binderhub/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ async def get(self, provider_prefix, _unescaped_spec):
node_selector=self.settings['build_node_selector'],
appendix=appendix,
log_tail_lines=self.settings['log_tail_lines'],
git_credentials=provider.git_credentials
git_credentials=provider.git_credentials,
sticky_builds=self.settings['sticky_builds'],
)

with BUILDS_INPROGRESS.track_inprogress():
Expand Down
54 changes: 54 additions & 0 deletions binderhub/tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

import json
import sys
from collections import namedtuple
from unittest import mock
from urllib.parse import quote

import pytest
from tornado.httputil import url_concat

from kubernetes import client

from binderhub.build import Build
from .utils import async_requests

Expand Down Expand Up @@ -51,6 +54,57 @@ async def test_build(app, needs_build, needs_launch, always_build, slug, pytestc
assert r.url.startswith(final['url'])


def test_default_affinity():
# check that the default affinity is a pod anti-affinity
build = Build(
mock.MagicMock(), api=mock.MagicMock(), name='test_build',
namespace='build_namespace', repo_url=mock.MagicMock(),
ref=mock.MagicMock(), build_image=mock.MagicMock(),
image_name=mock.MagicMock(), push_secret=mock.MagicMock(),
memory_limit=mock.MagicMock(), git_credentials=None,
docker_host='http://mydockerregistry.local',
node_selector=mock.MagicMock())

affinity = build.get_affinity()

assert isinstance(affinity, client.V1Affinity)
assert affinity.node_affinity is None
assert affinity.pod_affinity is None
assert affinity.pod_anti_affinity is not None


def test_sticky_builds_affinity():
# Setup some mock objects for the response from the k8s API
Pod = namedtuple("Pod", "spec")
PodSpec = namedtuple("PodSpec", "node_name")
PodList = namedtuple("PodList", "items")

mock_k8s_api = mock.MagicMock()
mock_k8s_api.list_namespaced_pod.return_value = PodList(
[Pod(PodSpec("node-a")), Pod(PodSpec("node-b"))],
)

build = Build(
mock.MagicMock(), api=mock_k8s_api, name='test_build',
namespace='build_namespace', repo_url=mock.MagicMock(),
ref=mock.MagicMock(), build_image=mock.MagicMock(),
image_name=mock.MagicMock(), push_secret=mock.MagicMock(),
memory_limit=mock.MagicMock(), git_credentials=None,
docker_host='http://mydockerregistry.local',
node_selector=mock.MagicMock(),
sticky_builds=True)

affinity = build.get_affinity()

assert isinstance(affinity, client.V1Affinity)
assert affinity.node_affinity is not None
assert affinity.pod_affinity is None
assert affinity.pod_anti_affinity is None

# One of the two nodes we have in our mock should be the preferred node
assert affinity.node_affinity.preferred_during_scheduling_ignored_during_execution[0].preference.match_fields[0].values[0] in ("node-a", "node-b")


def test_git_credentials_passed_to_podspec_upon_submit():
git_credentials = {
'client_id': 'my_username',
Expand Down
59 changes: 59 additions & 0 deletions binderhub/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from binderhub import utils


def test_rendezvous_rank():
# check that a key doesn't move if its assigned bucket remains but the
# other buckets are removed
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
key = "crazy frog is a crazy key"
first_round = utils.rendezvous_rank(["b1", "b2", "b3"], key)
second_round = utils.rendezvous_rank([first_round[0], first_round[1]], key)

assert first_round[0] == second_round[0], key
consideRatio marked this conversation as resolved.
Show resolved Hide resolved


def test_rendezvous_independence():
# check that the relative ranking of 80 buckets doesn't depend on the
# presence of 20 extra buckets
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
key = "k1"
eighty_buckets = utils.rendezvous_rank(["b%i" % i for i in range(80)], key)
hundred_buckets = utils.rendezvous_rank(["b%i" % i for i in range(100)], key)

for i in range(80, 100):
hundred_buckets.remove("b%i" % i)

assert eighty_buckets == hundred_buckets


def test_rendezvous_redistribution():
# check that approximately a third of keys move to the new bucket
# when one is added
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An idea for how to test that keys move when a new bucket becomes available and that the pattern of movement is right. I think this is how it should be but not sure. WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent!

Since we actually hash "nodes-key", this test doesn't need to be run multiple times, but if we would hash node separately from key and as consistent hashing was described to do as compared to our rendezvous hashing, then we could by fluke have two node hashes that are spaced luckily allow for the new node to to catch 1/3 of the stuff.

But hmmm, could you position nodes like pointers on a clock to initially have a fair share and then also after have a fair share?

Thats a clean test as well to have I think, to check that we have a 1/2 distribution initially and then get a 1/3 distribution after, combined with the previous test about perfectly stable we capture all kinds of logic. You are already doing this to some degree but it is captures mostly by the abs(from_b1 - from_b2) < 10 statement.

n_keys = 3000

# count how many keys were moved, which bucket a key started from and
# which bucket a key was moved from (to the new bucket)
n_moved = 0
from_bucket = {"b1": 0, "b2": 0}
start_in = {"b1": 0, "b2": 0}

for i in range(n_keys):
key = f"key-{i}"
two_buckets = utils.rendezvous_rank(["b1", "b2"], key)
start_in[two_buckets[0]] += 1
three_buckets = utils.rendezvous_rank(["b1", "b2", "b3"], key)

if two_buckets[0] != three_buckets[0]:
n_moved += 1
from_bucket[two_buckets[0]] += 1

# should always move to the newly added bucket
assert three_buckets[0] == "b3"

# because of statistical fluctuations we have to leave some room when
# making this comparison
assert 0.31 < n_moved / n_keys < 0.35
# keys should move from the two original buckets with approximately
# equal probability. We pick 30 because it is "about right"
assert abs(from_bucket["b1"] - from_bucket["b2"]) < 30
# the initial distribution of keys should be roughly the same
# We pick 30 because it is "about right"
assert abs(start_in["b1"] - start_in["b2"]) < 30
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The scale of the about right is different between the from_bucket and start_in difference, on average, the random walk distance if you flip a +1 or -1 over and over, is sqrt(N).

For the from_bucket case it is sqrt(~1000) == ~32 and for the start_in case it is sqrt(3000) == ~55.

Loading