Skip to content

Commit

Permalink
patch for managed identity storage authentication
Browse files Browse the repository at this point in the history
  • Loading branch information
DelphianCalamity committed Jul 7, 2024
1 parent 62d7efb commit 073cb5b
Show file tree
Hide file tree
Showing 14 changed files with 335 additions and 30 deletions.
6 changes: 3 additions & 3 deletions mlos_bench/mlos_bench/config/cli/azure-redis-bench.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
"environment": "environments/root/root-azure-redis.jsonc",
"storage": "storage/sqlite.jsonc",

"globals": [
"global_config_azure.jsonc"
],
// "globals": [
// "global_config_azure.jsonc"
// ],

"teardown": false,

Expand Down
21 changes: 12 additions & 9 deletions mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,22 @@
"trial_id"
],
"setup": [
"$mountPoint/$experiment_id/$trial_id/scripts/setup-workload.sh",
"$mountPoint/$experiment_id/$trial_id/scripts/setup-app.sh"
"sudo /usr/local/bin/azure_file_share_service --function 'download' --local_path $mountPoint/$experiment_id/$trial_id/scripts/ --remote_path $experiment_id/$trial_id/scripts",
"sudo /usr/local/bin/azure_file_share_service --function 'download' --local_path $mountPoint/$experiment_id/$trial_id/input/redis.cfg --remote_path $experiment_id/$trial_id/input/redis.cfg",
"sudo bash $mountPoint/$experiment_id/$trial_id/scripts/setup-workload.sh",
"sudo bash $mountPoint/$experiment_id/$trial_id/scripts/setup-app.sh"
],
"run": [
"mkdir -p /tmp/mlos_bench/output/",
"$mountPoint/$experiment_id/$trial_id/scripts/run-workload.sh",
"mkdir -p $mountPoint/$experiment_id/$trial_id/output/",
"cp -r /tmp/mlos_bench/output/* $mountPoint/$experiment_id/$trial_id/output/"
"sudo mkdir -p /tmp/mlos_bench/output/",
"sudo bash $mountPoint/$experiment_id/$trial_id/scripts/run-workload.sh",
"sudo /usr/local/bin/azure_file_share_service --function 'upload' --local_path /tmp/mlos_bench/output/ --remote_path $experiment_id/$trial_id/output"
// "mkdir -p $mountPoint/$experiment_id/$trial_id/output/",
// "cp -r /tmp/mlos_bench/output/* $mountPoint/$experiment_id/$trial_id/output/"
],
"teardown": [
"$mountPoint/$experiment_id/$trial_id/scripts/cleanup-workload.sh",
"$mountPoint/$experiment_id/$trial_id/scripts/cleanup-app.sh",
"rm -r /tmp/mlos_bench/"
"sudo bash $mountPoint/$experiment_id/$trial_id/scripts/cleanup-workload.sh",
"sudo bash $mountPoint/$experiment_id/$trial_id/scripts/cleanup-app.sh"
// "rm -r /tmp/mlos_bench/"
]
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,34 +52,51 @@
"config": {
"required_args": [
"vmName",
"tenantId",
"managedIdentityClientId",
"storageAccountName",
"storageFileShareName",
"storageAccountKey",
"mountPoint",
"experiment_id",
"trial_id"
],
"wait_boot": true,
"shell_env_params": [
"tenantId",
"managedIdentityClientId",
"storageAccountName",
"storageFileShareName",
"storageAccountKey",
"mountPoint",
"experiment_id",
"trial_id"
],
"setup": [
"sudo mkdir -p $mountPoint",
"sudo mount -t cifs //$storageAccountName.file.core.windows.net/$storageFileShareName $mountPoint -o username=$storageAccountName,password=\"$storageAccountKey\",serverino,nosharesock,actimeo=30",
"sudo chmod +x $mountPoint",
"sudo apt install software-properties-common -y",
"sudo add-apt-repository ppa:deadsnakes/ppa -y",
"sudo apt-get update -y",
"sudo apt-get install python3.8 -y",
"sudo apt install python3-pip -y",
"sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1",
"sudo python -m pip install --upgrade pip setuptools wheel",
"sudo python -m pip install azure-file-share-service",
"echo storageAccountName=$storageAccountName | sudo tee -a /etc/environment",
"echo storageFileShareName=$storageFileShareName | sudo tee -a /etc/environment",
"echo managedIdentityClientId=$managedIdentityClientId | sudo tee -a /etc/environment",
"echo tenantId=$tenantId | sudo tee -a /etc/environment",
"source /etc/environment"

// "sudo mount -t cifs //$storageAccountName.file.core.windows.net/$storageFileShareName $mountPoint -o username=$storageAccountName,password=\"$storageAccountKey\",serverino,nosharesock,actimeo=30",
// TODO: Apply GRUB parameters from config on shared storage at:
// "$mountPoint/$experiment_id/$trial_id/input/grub.cfg"
// "sudo update-grub",
// "sudo shutdown -r now" // Reboot to apply the parameters
"cat $mountPoint/$experiment_id/$trial_id/input/grub.cfg"
// "cat $mountPoint/$experiment_id/$trial_id/input/grub.cfg"
],
"teardown": [
// "sudo umount $mountPoint"
"echo sudo umount $mountPoint"
"echo sudo rm -rf $mountPoint"
]
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@
"vmName",
"storageAccountName",
"storageFileShareName",
"storageAccountKey",
"mountPoint",
"experiment_id",
"trial_id"
Expand All @@ -65,15 +64,15 @@
"shell_env_params": [
"storageAccountName",
"storageFileShareName",
"storageAccountKey",
"mountPoint",
"experiment_id",
"trial_id"
],
"setup": [
"sudo mkdir -p $mountPoint",
"sudo mount -t cifs //$storageAccountName.file.core.windows.net/$storageFileShareName $mountPoint -o username=$storageAccountName,password=\"$storageAccountKey\",serverino,nosharesock,actimeo=30",
"sudo $mountPoint/$experiment_id/$trial_id/input/config-kernel.sh"
"sudo /usr/local/bin/azure_file_share_service --function 'download' --local_path $mountPoint/$experiment_id/$trial_id/input/ --remote_path $experiment_id/$trial_id/input",
"sudo bash $mountPoint/$experiment_id/$trial_id/input/config-kernel.sh"
// "sudo mount -t cifs //$storageAccountName.file.core.windows.net/$storageFileShareName $mountPoint -o username=$storageAccountName,password=\"$storageAccountKey\",serverino,nosharesock,actimeo=30",
]
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
"deploymentName": "$experiment_id",
"vmName": "os-autotune-linux-vm",

"subscription": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"managedIdentityClientId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"managedIdentityName": "mlos-managed-identity",
"tenantId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",

"resourceGroup": "os-autotune",
"location": "westus2",

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
"description": "The name of the resource group to place the deployment in (typically provided in the global config in order to omit from source control).",
"type": "string"
},
"managedIdentityName": {
"description": "The name of the managed identity used for storage authentication (typically provided in the global config in order to omit from source control).",
"type": "string"
},
"deploymentTemplatePath": {
"description": "Path to an ARM template file, or null if it should be skipped.",
"type": ["string", "null"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
"description": "Azure storage file share name.",
"type": "string"
},
"storageAccountKey": {
"description": "Azure storage account key (typically provided in the global config in order to omit from source control).",
"managedIdentityClientId": {
"description": "Azure Managed Identity Client Id (typically provided in the global config in order to omit from source control).",
"type": "string"
},
"tenantId": {
"description": "Azure Tenant Id (typically provided in the global config in order to omit from source control).",
"type": "string"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"config": {
"storageAccountName": "PLACEHOLDER; e.g.: osatsharedstorage",
"storageFileShareName": "PLACEHOLDER; e.g.: os-autotune-file-share",
"storageAccountKey": "PLACEHOLDER; comes from global config"
"managedIdentityClientId": "PLACEHOLDER; comes from global config",
"tenantId": "PLACEHOLDER; comes from global config"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
// can be overridden by the parameters pushed from the caller Environment.
"subscription": "PLACEHOLDER; AZURE SUBSCRIPTION ID",
"resourceGroup": "PLACEHOLDER; e.g., os-autotune",
"managedIdentityName": "PLACEHOLDER; e.g., mlos-managed-identity",

"deploymentTemplatePath": "services/remote/azure/arm-templates/azuredeploy-ubuntu-vm.jsonc",

Expand Down
6 changes: 5 additions & 1 deletion mlos_bench/mlos_bench/environments/remote/host_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ def setup(self, tunables: TunableGroups, global_config: Optional[dict] = None) -
if status.is_pending():
(status, _) = self._host_service.wait_host_deployment(params, is_setup=True)

self._is_ready = status.is_succeeded()
(status_id_assign, params) = self._host_service.assign_managed_identity(self._params)
if status_id_assign.is_pending():
(status_id_assign, _) = self._host_service.wait_host_managed_identity_assignment(params)

self._is_ready = status.is_succeeded() and status_id_assign.is_succeeded()
return self._is_ready

def teardown(self) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ class AzureDeploymentService(Service, metaclass=abc.ABCMeta):
"?api-version=2022-05-01"
)

_URL_ASSIGN_IDENTITY = (
"https://management.azure.com" +
"/subscriptions/{subscription}" +
"/resourceGroups/{resource_group}" +
"/providers/Microsoft.Compute" +
"/virtualMachines/{vm_name}" +
"?api-version=2022-03-01"
)

def __init__(self,
config: Optional[Dict[str, Any]] = None,
global_config: Optional[Dict[str, Any]] = None,
Expand All @@ -73,6 +82,7 @@ def __init__(self,
check_required_params(self.config, [
"subscription",
"resourceGroup",
"managedIdentityName"
])

# These parameters can come from command line as strings, so conversion is needed.
Expand Down Expand Up @@ -392,6 +402,101 @@ def _check_deployment(self, params: dict) -> Tuple[Status, dict]: # pylint: di
_LOG.error("Response: %s :: %s", response, response.text)
return (Status.FAILED, {})

def _wait_host_managed_identity_assignment(self, params: dict) -> Tuple[Status, dict]:
"""
Waits for a pending operation on an Azure resource to resolve to SUCCEEDED or FAILED.
Return TIMED_OUT when timing out.
Parameters
----------
params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
Returns
-------
result : (Status, dict)
A pair of Status and result.
Status is one of {PENDING, SUCCEEDED, FAILED, TIMED_OUT}
Result is info on the operation runtime if SUCCEEDED, otherwise {}.
"""
params = self._set_default_params(params)
_LOG.info("Wait for %s to %s", self._deploy_params["vmName"], "be assigned managed identity")
return self._wait_while(self._check_managed_identity_assinment, Status.PENDING, params)

def _check_managed_identity_assinment(self, params: dict) -> Tuple[Status, dict]: # pylint: disable=too-many-return-statements
"""
Check if the identity is assigned to the VM.
Return SUCCEEDED if true, PENDING otherwise.
Parameters
----------
_params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
This parameter is not used; we need it for compatibility with
other polling functions used in `_wait_while()`.
Returns
-------
result : (Status, dict={})
A pair of Status and result. The result is always {}.
Status is one of {SUCCEEDED, PENDING, FAILED}
"""
params = self._set_default_params(params)
config = merge_parameters(
dest=self.config.copy(),
source=params,
required_keys=[
"subscription",
"resourceGroup",
"managedIdentityName",
]
)

vmName = self._deploy_params["vmName"]
subscriptionId = config["subscription"]
resourceGroup = config["resourceGroup"]
managedIdentityName = config["managedIdentityName"]

_LOG.info("Check identity assignment: %s", vmName)

url = self._URL_ASSIGN_IDENTITY.format(
subscription=subscriptionId,
resource_group=resourceGroup,
vm_name=vmName,
)

session = self._get_session(params)
try:
response = session.get(url, timeout=self._request_timeout)
except requests.exceptions.ReadTimeout:
_LOG.warning("Request timed out after %.2f s: %s", self._request_timeout, url)
return Status.RUNNING, {}
except requests.exceptions.RequestException as ex:
_LOG.exception("Error in request checking deployment", exc_info=ex)
return (Status.FAILED, {})

_LOG.debug("Response: %s", response)

if response.status_code == 200:
output = response.json()
state = output.get("properties", {}).get("provisioningState", "")
print("state", state)

print(output)
mid = f"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroup}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managedIdentityName}"
if state == "Succeeded" and "identity" in output and mid in output["identity"]["userAssignedIdentities"]:
return (Status.SUCCEEDED, {})
elif state in {"Accepted", "Creating", "Deleting", "Running", "Updating"}:
return (Status.PENDING, {})
else:
_LOG.error("Response: %s :: %s", response, json.dumps(output, indent=2))
return (Status.FAILED, {})
elif response.status_code == 404:
return (Status.PENDING, {})

_LOG.error("Response: %s :: %s", response, response.text)
return (Status.FAILED, {})

def _provision_resource(self, params: dict) -> Tuple[Status, dict]:
"""
Attempts to (re)deploy a resource.
Expand Down Expand Up @@ -465,3 +570,74 @@ def _provision_resource(self, params: dict) -> Tuple[Status, dict]:
_LOG.error("Response: %s :: %s", response, response.text)
# _LOG.error("Bad Request:\n%s", response.request.body)
return (Status.FAILED, {})

def _assign_managed_identity(self, params: dict) -> Tuple[Status, dict]:
"""
Attempts to assign a managed identity to resource.
Parameters
----------
params : dict
Flat dictionary of (key, value) pairs of tunable parameters.
Tunables are variable parameters that, together with the
Environment configuration, are sufficient to provision the resource.
Returns
-------
result : (Status, dict={})
A pair of Status and result. The result is the input `params` plus the
parameters extracted from the response JSON, or {} if the status is FAILED.
Status is one of {PENDING, SUCCEEDED, FAILED}
"""
params = self._set_default_params(params)
config = merge_parameters(dest=self.config.copy(), source=params)

vmName = self._deploy_params["vmName"]
subscriptionId = config["subscription"]
resourceGroup = config["resourceGroup"]
managedIdentityName = config["managedIdentityName"]

_LOG.info("Assign managed identity: %s :: %s", vmName, params)

url = self._URL_ASSIGN_IDENTITY.format(
subscription=subscriptionId,
resource_group=resourceGroup,
vm_name=vmName,
)

json_req = {
"identity": {
"type": "UserAssigned",
"userAssignedIdentities": {
f"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroup}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managedIdentityName}": {}
}
}
}

if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Request: PATCH %s\n%s", url, json.dumps(json_req, indent=2))

response = requests.patch(url, json=json_req,
headers=self._get_headers(), timeout=self._request_timeout)

if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Response: %s\n%s", response,
json.dumps(response.json(), indent=2)
if response.content else "")
else:
_LOG.info("Response: %s", response)

if response.status_code == 200:
return (Status.PENDING, config)
elif response.status_code == 201:
output = self._extract_arm_parameters(response.json())
if _LOG.isEnabledFor(logging.DEBUG):
_LOG.debug("Extracted parameters:\n%s", json.dumps(output, indent=2))
params.update(output)
params.setdefault("asyncResultsUrl", url)
params.setdefault("deploymentName", config["deploymentName"])
return (Status.PENDING, params)
else:
_LOG.error("Response: %s :: %s", response, response.text)
# _LOG.error("Bad Request:\n%s", response.request.body)
return (Status.FAILED, {})
Loading

0 comments on commit 073cb5b

Please sign in to comment.