Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core]: Fix ConnectionError on Autoscaler CR lookups in K8s clusters with custom DNS for Kubernetes API. #48541

Merged
merged 9 commits into from
Nov 8, 2024
38 changes: 29 additions & 9 deletions python/ray/autoscaler/_private/kuberay/node_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@

RAY_HEAD_POD_NAME = os.getenv("RAY_HEAD_POD_NAME")

# https://kubernetes.io/docs/tasks/run-application/access-api-from-pod
# While running in a Pod, your container can create an HTTPS URL for the
# Kubernetes API server by fetching the KUBERNETES_SERVICE_HOST and
# KUBERNETES_SERVICE_PORT_HTTPS environment variables.
KUBERNETES_SERVICE_HOST = os.getenv(
"KUBERNETES_SERVICE_HOST", "https://kubernetes.default"
)
KUBERNETES_SERVICE_PORT = os.getenv("KUBERNETES_SERVICE_PORT_HTTPS", "443")
KUBERNETES_HOST = f"{KUBERNETES_SERVICE_HOST}:{KUBERNETES_SERVICE_PORT}"
# Key for GKE label that identifies which multi-host replica a pod belongs to
REPLICA_INDEX_KEY = "replicaIndex"

Expand Down Expand Up @@ -179,29 +188,40 @@ def load_k8s_secrets() -> Tuple[Dict[str, str], str]:


def url_from_resource(
namespace: str, path: str, kuberay_crd_version: str = KUBERAY_CRD_VER
namespace: str,
path: str,
kuberay_crd_version: str = KUBERAY_CRD_VER,
kubernetes_host: str = KUBERNETES_HOST,
) -> str:
"""Convert resource path to REST URL for Kubernetes API server.
Args:
namespace: The K8s namespace of the resource
path: The part of the resource path that starts with the resource type.
Supported resource types are "pods" and "rayclusters".
kuberay_crd_version: The API version of the KubeRay CRD.
Looks like "v1alpha1", "v1".
kubernetes_host: The host of the Kubernetes API server.
Uses $KUBERNETES_SERVICE_HOST and
$KUBERNETES_SERVICE_PORT to construct the kubernetes_host if not provided.
When set by Kubernetes,
$KUBERNETES_SERVICE_HOST could be an IP address. That's why the https
scheme is added here.
Defaults to "https://kubernetes.default:443".
"""
if kubernetes_host.startswith("http://"):
raise ValueError("Kubernetes host must be accessed over HTTPS.")
if not kubernetes_host.startswith("https://"):
kubernetes_host = "https://" + kubernetes_host
if path.startswith("pods"):
api_group = "/api/v1"
elif path.startswith("rayclusters"):
api_group = "/apis/ray.io/" + kuberay_crd_version
else:
raise NotImplementedError("Tried to access unknown entity at {}".format(path))
return (
"https://kubernetes.default:443"
+ api_group
+ "/namespaces/"
+ namespace
+ "/"
+ path
)
return kubernetes_host + api_group + "/namespaces/" + namespace + "/" + path


def _worker_group_index(raycluster: Dict[str, Any], group_name: str) -> int:
Expand Down