Skip to content

Commit

Permalink
assigning user id using ARM at provision time
Browse files Browse the repository at this point in the history
  • Loading branch information
DelphianCalamity committed Jul 8, 2024
1 parent 0307e67 commit 15fd0a0
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 263 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
// The name of the experiment.
// This is required value and should be unique across incompatible experiments
// (e.g., those with differing tunables, scripts, versions, etc.), since it also
// controls how trial data is stored and reloaded to resume and repopulate the
// optimizer state.
"experiment_id": "RedisTest10",

// Any global parameter can be used as a dollar variable in the global config:
"deploymentName": "$experiment_id",
"vmName": "$experiment_id-vm",

"subscription": "84334f8c-9e72-424d-8fc2-fd6da32e9ad6",
"managedIdentityClientId": "cb2dbf06-0b16-4058-8e33-7561327c8bcf",
"managedIdentityName": "workfm-managed-identity",
"tenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47",

"resourceGroup": "t-kellyko-WorkFM",
"location": "eastus2",

"virtualNetworkName": "kelkost-dv-node-vnet",
"subnetName": "default",

"storageAccountName": "eastus2benchbasestorage",
"storageFileShareName": "eastus2benchbasestoragefileshare",

"vmSize": "Standard_B2s",
"ubuntuOSVersion": "18.04-LTS",

"tunable_params_map": {

// VM provisioning parameter groups (see `azure-vm-tunables.jsonc`):
// ["azure-vm"] (not used at the moment)
"provision": [],

// Boot-time Linux parameter groups (see `linux-boot-tunables.jsonc`):
// ["linux-kernel-boot"]
"linux-boot": ["linux-kernel-boot"],

// Runtime Linux parameter groups (see `linux-runtime-tunables.jsonc`):
// ["linux-swap", "linux-hugepages-2048kB", "linux-scheduler"]
"linux-runtime": ["linux-scheduler"],

// Redis config parameter groups (see `redis-tunables.jsonc`):
// ["redis"]
"redis": []
},

"optimization_targets": {"score": "min"}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@
"description": "The name of the resource group to place the deployment in (typically provided in the global config in order to omit from source control).",
"type": "string"
},
"managedIdentityName": {
"description": "The name of the managed identity used for storage authentication (typically provided in the global config in order to omit from source control).",
"type": "string"
},
"deploymentTemplatePath": {
"description": "Path to an ARM template file, or null if it should be skipped.",
"type": ["string", "null"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
"description": "OS Autotune Linux VM"
}
},
"managedIdentityName": {
"type": "string",
"metadata": {
"description": "User managed identity name"
}
},
"customData": {
"type": "string",
"defaultValue": "",
Expand Down Expand Up @@ -187,6 +193,12 @@
"apiVersion": "2021-11-01",
"name": "[parameters('vmName')]",
"location": "[parameters('location')]",
"identity": {
"type": "userAssigned",
"userAssignedIdentities": {
"[resourceID('Microsoft.ManagedIdentity/userAssignedIdentities/',parameters('managedIdentityName'))]": {}
}
},
"properties": {
"hardwareProfile": {
"vmSize": "[parameters('vmSize')]"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
// can be overridden by the parameters pushed from the caller Environment.
"subscription": "PLACEHOLDER; AZURE SUBSCRIPTION ID",
"resourceGroup": "PLACEHOLDER; e.g., os-autotune",
"managedIdentityName": "PLACEHOLDER; e.g., mlos-managed-identity",

"deploymentTemplatePath": "services/remote/azure/arm-templates/azuredeploy-ubuntu-vm.jsonc",

// Make sure to list all ARM template parameters that can be overridden by the caller.
"deploymentTemplateParameters": {

"managedIdentityName": "PLACEHOLDER; e.g., mlos-managed-identity",
"storageAccountName": "PLACEHOLDER; e.g., osatsharedstorage",
"storageFileShareName": "PLACEHOLDER; e.g., os-autotune-file-share",
"location": "PLACEHOLDER; e.g., westus2",
Expand Down
6 changes: 1 addition & 5 deletions mlos_bench/mlos_bench/environments/remote/host_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,7 @@ def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) -
if status.is_pending():
(status, _) = self._host_service.wait_host_deployment(params, is_setup=True)

(status_id_assign, params) = self._host_service.assign_managed_identity(self._params)
if status_id_assign.is_pending():
(status_id_assign, _) = self._host_service.wait_host_managed_identity_assignment(params)

self._is_ready = status.is_succeeded() and status_id_assign.is_succeeded()
self._is_ready = status.is_succeeded()
return self._is_ready

def teardown(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,6 @@ class AzureDeploymentService(Service, metaclass=abc.ABCMeta):
"?api-version=2022-05-01"
)

_URL_ASSIGN_IDENTITY = (
"https://management.azure.com" +
"/subscriptions/{subscription}" +
"/resourceGroups/{resource_group}" +
"/providers/Microsoft.Compute" +
"/virtualMachines/{vm_name}" +
"?api-version=2022-03-01"
)

def __init__(self,
config: Optional[Dict[str, Any]] = None,
global_config: Optional[Dict[str, Any]] = None,
Expand All @@ -82,7 +73,6 @@ def __init__(self,
check_required_params(self.config, [
"subscription",
"resourceGroup",
"managedIdentityName"
])

# These parameters can come from command line as strings, so conversion is needed.
Expand Down Expand Up @@ -402,101 +392,6 @@ def _check_deployment(self, params: dict) -> Tuple[Status, dict]: # pylint: di
_LOG.error("Response: %s :: %s", response, response.text)
return (Status.FAILED, {})

def _wait_host_managed_identity_assignment(self, params: dict) -> Tuple[Status, dict]:
"""
Waits for a pending operation on an Azure resource to resolve to SUCCEEDED or FAILED.
Return TIMED_OUT when timing out.
Parameters
----------
params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
Returns
-------
result : (Status, dict)
A pair of Status and result.
Status is one of {PENDING, SUCCEEDED, FAILED, TIMED_OUT}
Result is info on the operation runtime if SUCCEEDED, otherwise {}.
"""
params = self._set_default_params(params)
_LOG.info("Wait for %s to %s", self._deploy_params["vmName"], "be assigned managed identity")
return self._wait_while(self._check_managed_identity_assinment, Status.PENDING, params)

def _check_managed_identity_assinment(self, params: dict) -> Tuple[Status, dict]: # pylint: disable=too-many-return-statements
"""
Check if the identity is assigned to the VM.
Return SUCCEEDED if true, PENDING otherwise.
Parameters
----------
_params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
This parameter is not used; we need it for compatibility with
other polling functions used in `_wait_while()`.
Returns
-------
result : (Status, dict={})
A pair of Status and result. The result is always {}.
Status is one of {SUCCEEDED, PENDING, FAILED}
"""
params = self._set_default_params(params)
config = merge_parameters(
dest=self.config.copy(),
source=params,
required_keys=[
"subscription",
"resourceGroup",
"managedIdentityName",
]
)

vmName = self._deploy_params["vmName"]
subscriptionId = config["subscription"]
resourceGroup = config["resourceGroup"]
managedIdentityName = config["managedIdentityName"]

_LOG.info("Check identity assignment: %s", vmName)

url = self._URL_ASSIGN_IDENTITY.format(
subscription=subscriptionId,
resource_group=resourceGroup,
vm_name=vmName,
)

session = self._get_session(params)
try:
response = session.get(url, timeout=self._request_timeout)
except requests.exceptions.ReadTimeout:
_LOG.warning("Request timed out after %.2f s: %s", self._request_timeout, url)
return Status.RUNNING, {}
except requests.exceptions.RequestException as ex:
_LOG.exception("Error in request checking deployment", exc_info=ex)
return (Status.FAILED, {})

_LOG.debug("Response: %s", response)

if response.status_code == 200:
output = response.json()
state = output.get("properties", {}).get("provisioningState", "")
print("state", state)

print(output)
mid = f"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroup}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managedIdentityName}"
if state == "Succeeded" and "identity" in output and mid in output["identity"]["userAssignedIdentities"]:
return (Status.SUCCEEDED, {})
elif state in {"Accepted", "Creating", "Deleting", "Running", "Updating"}:
return (Status.PENDING, {})
else:
_LOG.error("Response: %s :: %s", response, json.dumps(output, indent=2))
return (Status.FAILED, {})
elif response.status_code == 404:
return (Status.PENDING, {})

_LOG.error("Response: %s :: %s", response, response.text)
return (Status.FAILED, {})

def _provision_resource(self, params: dict) -> Tuple[Status, dict]:
"""
Attempts to (re)deploy a resource.
Expand Down Expand Up @@ -570,74 +465,3 @@ def _provision_resource(self, params: dict) -> Tuple[Status, dict]:
_LOG.error("Response: %s :: %s", response, response.text)
# _LOG.error("Bad Request:\n%s", response.request.body)
return (Status.FAILED, {})

def _assign_managed_identity(self, params: dict) -> Tuple[Status, dict]:
"""
Attempts to assign a managed identity to resource.
Parameters
----------
params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
Tunables are variable parameters that, together with the
Environment configuration, are sufficient to provision the resource.
Returns
-------
result : (Status, dict={})
A pair of Status and result. The result is the input `params` plus the
parameters extracted from the response JSON, or {} if the status is FAILED.
Status is one of {PENDING, SUCCEEDED, FAILED}
"""
params = self._set_default_params(params)
config = merge_parameters(dest=self.config.copy(), source=params)

vmName = self._deploy_params["vmName"]
subscriptionId = config["subscription"]
resourceGroup = config["resourceGroup"]
managedIdentityName = config["managedIdentityName"]

_LOG.info("Assign managed identity: %s :: %s", vmName, params)

url = self._URL_ASSIGN_IDENTITY.format(
subscription=subscriptionId,
resource_group=resourceGroup,
vm_name=vmName,
)

json_req = {
"identity": {
"type": "UserAssigned",
"userAssignedIdentities": {
f"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroup}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managedIdentityName}": {}
}
}
}

if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Request: PATCH %s\n%s", url, json.dumps(json_req, indent=2))

response = requests.patch(url, json=json_req,
headers=self._get_headers(), timeout=self._request_timeout)

if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Response: %s\n%s", response,
json.dumps(response.json(), indent=2)
if response.content else "")
else:
_LOG.info("Response: %s", response)

if response.status_code == 200:
return (Status.PENDING, config)
elif response.status_code == 201:
output = self._extract_arm_parameters(response.json())
if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Extracted parameters:\n%s", json.dumps(output, indent=2))
params.update(output)
params.setdefault("asyncResultsUrl", url)
params.setdefault("deploymentName", config["deploymentName"])
return (Status.PENDING, params)
else:
_LOG.error("Response: %s :: %s", response, response.text)
# _LOG.error("Bad Request:\n%s", response.request.body)
return (Status.FAILED, {})
46 changes: 2 additions & 44 deletions mlos_bench/mlos_bench/services/remote/azure/azure_vm_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,6 @@ def __init__(self,
config, global_config, parent,
self.merge_methods(methods, [
# SupportsHostProvisioning
self.assign_managed_identity,
self.wait_host_managed_identity_assignment,
self.provision_host,
self.deprovision_host,
self.deallocate_host,
Expand Down Expand Up @@ -168,9 +166,8 @@ def _set_default_params(self, params: dict) -> dict: # pylint: disable=no-sel
# Try and provide a semi sane default for the deploymentName if not provided
# since this is a common way to set the deploymentName and can save some
# config work for the caller.
deploy_params = super().deploy_params
if "vmName" in deploy_params and "deploymentName" not in params:
params["deploymentName"] = f"{deploy_params['vmName']}-deployment"
if "vmName" in params and "deploymentName" not in params:
params["deploymentName"] = f"{params['vmName']}-deployment"
_LOG.info("deploymentName missing from params. Defaulting to '%s'.", params["deploymentName"])
return params

Expand All @@ -195,25 +192,6 @@ def wait_host_deployment(self, params: dict, *, is_setup: bool) -> Tuple[Status,
"""
return self._wait_deployment(params, is_setup=is_setup)

def wait_host_managed_identity_assignment(self, params: dict) -> Tuple[Status, dict]:
"""
Waits for a pending identity assignment on an Azure VM to resolve to SUCCEEDED or FAILED.
Return TIMED_OUT when timing out.
Parameters
----------
params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
Returns
-------
result : (Status, dict)
A pair of Status and result.
Status is one of {PENDING, SUCCEEDED, FAILED, TIMED_OUT}
Result is info on the operation runtime if SUCCEEDED, otherwise {}.
"""
return self._wait_host_managed_identity_assignment(params)

def wait_host_operation(self, params: dict) -> Tuple[Status, dict]:
"""
Waits for a pending operation on an Azure VM to resolve to SUCCEEDED or FAILED.
Expand Down Expand Up @@ -296,26 +274,6 @@ def deprovision_host(self, params: dict) -> Tuple[Status, dict]:
vm_name=config["vmName"],
))

def assign_managed_identity(self, params: dict) -> Tuple[Status, dict]:
"""
Assign managed identity to Azure VM
Parameters
----------
params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
HostEnv tunables are variable parameters that, together with the
HostEnv configuration, are sufficient to provision a VM.
Returns
-------
result : (Status, dict={})
A pair of Status and result. The result is the input `params` plus the
parameters extracted from the response JSON, or {} if the status is FAILED.
Status is one of {PENDING, SUCCEEDED, FAILED}
"""
return self._assign_managed_identity(params)

def deallocate_host(self, params: dict) -> Tuple[Status, dict]:
"""
Deallocates the VM on Azure by shutting it down then releasing the compute resources.
Expand Down
Loading

0 comments on commit 15fd0a0

Please sign in to comment.