Skip to content

Commit

Permalink
[ctrmgr]: Container image clean up bug fix (sonic-net#15772)
Browse files Browse the repository at this point in the history
Why I did it
When do clean up container images, current code has two bugs need to be fixed. And some variables' name maybe cause confused, change the variables' name.

Work item tracking
Microsoft ADO (number only): 24502294

How I did it
We do clean up after tag latest successfully. But currently tag latest function only return 0 and 1, 0 means succeed and 1 means failed, when we get 1, we will retry, when we get 0, we will do clean up. Actually the code 0 includes another case we don't need to do clean up. The case is that when we are doing tag latest, the container image we want to tag maybe not running, so we can not tag latest and don't need to cleanup, we need to separate this case from 0, return -1 now.

When local mode(v1) -> kube mode(v2) happens, one problem is how to handle the local image, there are two cases. one case is that there was one kube v1 container dry-run(cause we don't relace the local if kube version = local version), we will remove the kube v1 image and tag the local version with ACR prefix and remove local v1 local tag. Another case is that there was no kube v1 container dry-run, we remove the local v1 image directly, cause the local v1 image should not be the last desire version.

About the docker_id variable, it may cause confused, it's actually docker image id, so rename the variable. About the two dicts and the list, rename them to be more readable.

How to verify it
Check tag latest and image clean up result.
  • Loading branch information
lixiaoyuner committed Jul 14, 2023
1 parent 6d491fa commit abe212e
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 27 deletions.
8 changes: 6 additions & 2 deletions src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ def on_state_update(self, key, op, data):

def do_tag_latest(self, feat, docker_id, image_ver):
ret = kube_commands.tag_latest(feat, docker_id, image_ver)
if ret != 0:
if ret == 1:
# Tag latest failed. Retry after an interval
self.start_time = datetime.datetime.now()
self.start_time += datetime.timedelta(
Expand All @@ -590,7 +590,7 @@ def do_tag_latest(self, feat, docker_id, image_ver):

log_debug("Tag latest as local failed retry after {} seconds @{}".
format(remote_ctr_config[TAG_RETRY], self.start_time))
else:
elif ret == 0:
last_version = self.st_data[feat][ST_FEAT_CTR_STABLE_VER]
if last_version == image_ver:
last_version = self.st_data[feat][ST_FEAT_CTR_LAST_VER]
Expand All @@ -600,6 +600,10 @@ def do_tag_latest(self, feat, docker_id, image_ver):
self.st_data[ST_FEAT_CTR_LAST_VER] = last_version
self.st_data[ST_FEAT_CTR_STABLE_VER] = image_ver
self.do_clean_image(feat, image_ver, last_version)
elif ret == -1:
# This means the container we want to tag latest is not running
# so we don't need to do clean up
pass

def do_clean_image(self, feat, current_version, last_version):
ret = kube_commands.clean_image(feat, current_version, last_version)
Expand Down
53 changes: 30 additions & 23 deletions src/sonic-ctrmgrd/ctrmgr/kube_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def tag_latest(feat, docker_id, image_ver):
else:
log_error(err)
elif ret == -1:
ret = 0
log_debug(out)
else:
log_error(err)
return ret
Expand All @@ -487,45 +487,52 @@ def _do_clean(feat, current_version, last_version):
err = ""
out = ""
ret = 0
DOCKER_ID = "docker_id"
IMAGE_ID = "image_id"
REPO = "repo"
_, image_info, err = _run_command("docker images |grep {} |grep -v latest |awk '{{print $1,$2,$3}}'".format(feat))
if image_info:
version_dict = {}
version_dict_default = {}
remote_image_version_dict = {}
local_image_version_dict = {}
for info in image_info.split("\n"):
rep, version, docker_id = info.split()
rep, version, image_id = info.split()
if len(rep.split("/")) == 1:
version_dict_default[version] = {DOCKER_ID: docker_id, REPO: rep}
local_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep}
else:
version_dict[version] = {DOCKER_ID: docker_id, REPO: rep}
remote_image_version_dict[version] = {IMAGE_ID: image_id, REPO: rep}

if current_version in version_dict:
image_prefix = version_dict[current_version][REPO]
del version_dict[current_version]
if current_version in remote_image_version_dict:
image_prefix = remote_image_version_dict[current_version][REPO]
del remote_image_version_dict[current_version]
else:
out = "Current version {} doesn't exist.".format(current_version)
ret = 0
return ret, out, err
# should be only one item in version_dict_default
for k, v in version_dict_default.items():
local_version, local_repo, local_docker_id = k, v[REPO], v[DOCKER_ID]
tag_res, _, err = _run_command("docker tag {} {}:{} && docker rmi {}:{}".format(
local_docker_id, image_prefix, local_version, local_repo, local_version))
# should be only one item in local_image_version_dict
for k, v in local_image_version_dict.items():
local_version, local_repo, local_image_id = k, v[REPO], v[IMAGE_ID]
# if there is a kube image with same version, need to remove the kube version
# and tag the local version to kube version for fallback preparation
# and remove the local version
if local_version in remote_image_version_dict:
tag_res, _, err = _run_command("docker rmi {}:{} && docker tag {} {}:{} && docker rmi {}:{}".format(
image_prefix, local_version, local_image_id, image_prefix, local_version, local_repo, local_version))
# if there is no kube image with same version, just remove the local version
else:
tag_res, _, err = _run_command("docker rmi {}:{}".format(local_repo, local_version))
if tag_res == 0:
msg = "Tag {} local version images successfully".format(feat)
log_debug(msg)
else:
ret = 1
err = "Failed to tag {} local version images. Err: {}".format(feat, err)
return ret, out, err
if last_version in version_dict:
del version_dict[last_version]
versions = [item[DOCKER_ID] for item in version_dict.values()]
if versions:
clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(versions)))

if last_version in remote_image_version_dict:
del remote_image_version_dict[last_version]

image_id_remove_list = [item[IMAGE_ID] for item in remote_image_version_dict.values()]
if image_id_remove_list:
clean_res, _, err = _run_command("docker rmi {} --force".format(" ".join(image_id_remove_list)))
else:
clean_res = 0
if clean_res == 0:
Expand All @@ -534,7 +541,7 @@ def _do_clean(feat, current_version, last_version):
err = "Failed to clean {} old version images. Err: {}".format(feat, err)
ret = 1
else:
err = "Failed to docker images |grep {} |awk '{{print $3}}'".format(feat)
err = "Failed to docker images |grep {} |awk '{{print $3}}'. Error: {}".format(feat, err)
ret = 1

return ret, out, err
Expand Down
23 changes: 21 additions & 2 deletions src/sonic-ctrmgrd/tests/kube_commands_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@
},
2: {
common_test.DESCR: "Tag a unstable container",
common_test.RETVAL: 0,
common_test.RETVAL: -1,
common_test.ARGS: ["snmp", "123456", "v1"],
common_test.PROC_CMD: [
"docker ps |grep 123456"
Expand Down Expand Up @@ -382,7 +382,7 @@
common_test.ARGS: ["snmp", "20201231.84", ""],
common_test.PROC_CMD: [
"docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'",
"docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74"
"docker rmi docker-sonic-telemetry:20201231.74"
],
common_test.PROC_OUT: [
"docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\
Expand All @@ -394,6 +394,25 @@
0
]
},
5: {
common_test.DESCR: "Clean image successfuly(local to dry-kube to kube)",
common_test.RETVAL: 0,
common_test.ARGS: ["snmp", "20201231.84", "20201231.74"],
common_test.PROC_CMD: [
"docker images |grep snmp |grep -v latest |awk '{print $1,$2,$3}'",
"docker rmi sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker tag 507f8d28bf6e sonick8scue.azurecr.io/docker-sonic-telemetry:20201231.74 && docker rmi docker-sonic-telemetry:20201231.74"
],
common_test.PROC_OUT: [
"docker-sonic-telemetry 20201231.74 507f8d28bf6e\n\
sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.74 507f8d28bf6f\n\
sonick8scue.azurecr.io/docker-sonic-telemetry 20201231.84 507f8d28bf6g",
""
],
common_test.PROC_CODE: [
0,
0
]
},
}

class TestKubeCommands(object):
Expand Down

0 comments on commit abe212e

Please sign in to comment.