diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8cb9f8d..6a63e99 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3-beta +current_version = 0.4-beta parse = (?P\d+)\.(?P\d+)(-(?P.*))? message = Bump version: {current_version} -> {new_version} serialize = diff --git a/README.md b/README.md index fa42295..4922805 100644 --- a/README.md +++ b/README.md @@ -58,21 +58,20 @@ $ export PATH=$PATH:$(python3 -m site --user-base)/bin ## Usage ## ``` -$ gce-rescue --help - - USAGE: gce-rescue [flags] -flags: - -./gce-rescue.py: - --[no]debug: Print to the log file in debug level. - (default: 'false') - --[no]force: Don't ask for confirmation. - (default: 'false') - --name: Instance name. - --project: The project-id that has the instance. - --zone: Zone where the instance is created. - -Try --helpfull to get a list of all flags. +gce-rescue --help +usage: gce-rescue [-h] [-p PROJECT] -z ZONE -n NAME [-d] [-f] [--skip-snapshot] + +GCE Rescue v0.4-beta - Set/Reset GCE instances to boot in rescue mode. + +optional arguments: + -h, --help show this help message and exit + -p PROJECT, --project PROJECT + The project-id that has the instance. + -z ZONE, --zone ZONE Zone where the instance is created. + -n NAME, --name NAME Instance name. + -d, --debug Print to the log file in debug leve + -f, --force Don't ask for confirmation. + --skip-snapshot Skip backing up the disk using a snapshot. ``` - ### --zone ### @@ -87,8 +86,12 @@ Try --helpfull to get a list of all flags. - If provided, the log output will be set to DEBUG level. (OPTIONAL) - The log file will be created on ./ containing the VM name and timestamp on the name, that can be used to help to troubleshoot failed executions as well as to manually recover the instance's original configuration, if necessary. + - > The log files contain important information about the initial state of the VM instance that may be required to manually restore it. -> The log files contain important information about the initial state of the VM instance that may be required to manually restore it. + +- ### --skip-snapshot ### + - Skip the snapshot creation. (OPTIONAL) + - Before setting your instance in rescue mode, GCE Rescue will always create a snapshot of your boot disk before taking any action. For some users this might be time consuming and not always necessary. Use this argument if you want to skip this step. --- diff --git a/gce_rescue/.DS_Store b/gce_rescue/.DS_Store deleted file mode 100644 index 1cb28b9..0000000 Binary files a/gce_rescue/.DS_Store and /dev/null differ diff --git a/gce_rescue/bin/rescue.py b/gce_rescue/bin/rescue.py index 0535b6b..bce2ef9 100755 --- a/gce_rescue/bin/rescue.py +++ b/gce_rescue/bin/rescue.py @@ -68,11 +68,12 @@ def main(): print('Restoring VM...') action = 'reset_rescue_mode' - msg = messages.tip_restore_disk(vm) + has_snapshot = vm.snapshot + msg = messages.tip_restore_disk(vm, snapshot=has_snapshot) call_tasks(vm=vm, action=action) print(msg) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/gce_rescue/config.py b/gce_rescue/config.py index aa8ac82..3f25983 100644 --- a/gce_rescue/config.py +++ b/gce_rescue/config.py @@ -19,11 +19,12 @@ dirname = os.path.dirname(__file__) -VERSION = '0.3-beta' +VERSION = '0.4-beta' config = { 'version': VERSION, 'debug': False, + 'skip-snapshot': False, 'startup-script-file': os.path.join(dirname, 'startup-script.txt'), 'source_guests': { 'x86_64':[ @@ -56,9 +57,11 @@ def process_args(): help='Print to the log file in debug leve') parser.add_argument('-f', '--force', action='store_true', help='Don\'t ask for confirmation.') - + parser.add_argument('--skip-snapshot', action='store_true', + help='Skip backing up the disk using a snapshot.') return parser def set_configs(user_args): config['debug'] = getattr(user_args, 'debug') + config['skip-snapshot'] = getattr(user_args, 'skip_snapshot') diff --git a/gce_rescue/gce.py b/gce_rescue/gce.py index bf43b56..aac89d2 100644 --- a/gce_rescue/gce.py +++ b/gce_rescue/gce.py @@ -13,14 +13,16 @@ # limitations under the License. """ Initilization Instance() with VM information. """ +import sys from googleapiclient.discovery import Resource +from googleapiclient.errors import HttpError from dataclasses import dataclass, field from typing import Dict, List, Union from time import time from gce_rescue.tasks.backup import backup_metadata_items -from gce_rescue.tasks.disks import list_disk +from gce_rescue.tasks.disks import list_disk, list_snapshot from gce_rescue.tasks.pre_validations import Validations from gce_rescue.config import get_config @@ -42,7 +44,6 @@ def get_instance_info( **project_data, instance = name).execute() - def guess_guest(data: Dict) -> str: """Determined which Guest OS Family is being used and select a different OS for recovery disk. @@ -70,7 +71,7 @@ def validate_instance_mode(data: Dict) -> Dict: 'rescue-mode': False, 'ts': generate_ts() } - if 'metadata' in data and 'items' in data['metadata']: + if 'metadata' in data and 'items' in data['metadata']: metadata = data['metadata'] for item in metadata['items']: if item['key'] == 'rescue-mode': @@ -113,12 +114,16 @@ def __post_init__(self): test_mode=self.test_mode, **self.project_data ) - self.compute = check.compute - self.project = check.adc_project - self.data = get_instance_info( + try: + self.compute = check.compute + self.project = check.adc_project + self.data = get_instance_info( compute=self.compute, name=self.name, project_data=self.project_data) + except HttpError as e: + print(e.reason) + sys.exit(1) self._rescue_mode_status = validate_instance_mode(self.data) self.ts = self._rescue_mode_status['ts'] @@ -216,3 +221,10 @@ def backup_items(self, v: List[str]) -> None: @property def disks(self) -> List[str]: return self._disks + + @property + def snapshot(self) -> str: + if not self.rescue_mode_status['rescue-mode']: + return f"{self.disks['disk_name']}-{self.ts}" + return list_snapshot(self) + diff --git a/gce_rescue/messages.py b/gce_rescue/messages.py index c46a33d..26d070a 100644 --- a/gce_rescue/messages.py +++ b/gce_rescue/messages.py @@ -26,9 +26,14 @@ def tip_connect_ssh(vm: Instance) -> str: f'{vm.zone}/instances/{vm.name}?authuser=0&hl=en_US&useAdminProxy=true&' f'troubleshoot4005Enabled=true\n') -def tip_restore_disk(vm: Instance) -> str: - return (f'└── The instance {vm.name} was restored! Use the snapshot below ' +def tip_restore_disk(vm: Instance, snapshot=False) -> str: + if not snapshot: + snapshot_restore_msg = '' + else: + snapshot_restore_msg = (f' Use the snapshot below ' f'if you need to restore the modification made while the instance was ' f'in rescue mode.\n Snapshot name: {vm.disks["disk_name"]}-{vm.ts}\n' f' More information: ' f'https://cloud.google.com/compute/docs/disks/restore-snapshot\n') + + return f'└── The instance {vm.name} was restored!' + snapshot_restore_msg diff --git a/gce_rescue/tasks/actions.py b/gce_rescue/tasks/actions.py index 610a78c..7d3c453 100644 --- a/gce_rescue/tasks/actions.py +++ b/gce_rescue/tasks/actions.py @@ -19,7 +19,8 @@ from gce_rescue.gce import Instance from gce_rescue.tasks.disks import ( - config_rescue_disks, + take_snapshot, + create_rescue_disk, restore_original_disk, attach_disk ) @@ -32,7 +33,7 @@ restore_metadata_items ) from gce_rescue.utils import Tracker - +from gce_rescue.config import get_config _logger = logging.getLogger(__name__) def _list_tasks(vm: Instance, action: str) -> List: @@ -50,7 +51,7 @@ def _list_tasks(vm: Instance, action: str) -> List: }] }, { - 'name': config_rescue_disks, + 'name': create_rescue_disk, 'args': [{ 'vm': vm }] @@ -120,6 +121,13 @@ def _list_tasks(vm: Instance, action: str) -> List: def call_tasks(vm: Instance, action: str) -> None: """ Loop tasks dict and execute """ tasks = _list_tasks(vm = vm, action = action) + async_backup_thread = None + if action == 'set_rescue_mode': + if get_config('skip-snapshot'): + _logger.info(f'Skipping snapshot backup.') + else: + take_snapshot(vm) + async_backup_thread = True total_tasks = len(tasks) tracker = Tracker(total_tasks) @@ -132,4 +140,8 @@ def call_tasks(vm: Instance, action: str) -> None: execute(**args) tracker.advance(step = 1) - tracker.finish() \ No newline at end of file + if async_backup_thread: + _logger.info(f'Waiting for async backup to finish') + take_snapshot(vm, join_snapshot=True) + _logger.info('done.') + tracker.finish() diff --git a/gce_rescue/tasks/backup.py b/gce_rescue/tasks/backup.py index e08ee23..351c471 100644 --- a/gce_rescue/tasks/backup.py +++ b/gce_rescue/tasks/backup.py @@ -30,7 +30,7 @@ def backup_metadata_items(data: Dict) -> List: return data['metadata']['items'] return [] -def _create_snapshot(vm) -> Dict: +def create_snapshot(vm) -> Dict: """ Create a snaphost of the instance boot disk, adding self._ts to the disk name. https://cloud.google.com/compute/docs/reference/rest/v1/disks/createSnapshot @@ -39,11 +39,14 @@ def _create_snapshot(vm) -> Dict: """ disk_name = vm.disks['disk_name'] + # Patch issues/23 + region = vm.zone[:-2] snapshot_name = f'{disk_name}-{vm.ts}' snapshot_body = { - 'name': snapshot_name + 'name': snapshot_name, + 'storageLocations': [ region ] } - _logger.info(f'Creating snapshot {snapshot_name}... ') + _logger.info(f'Creating snapshot {snapshot_body}... ') operation = vm.compute.disks().createSnapshot( **vm.project_data, disk = disk_name, @@ -51,8 +54,3 @@ def _create_snapshot(vm) -> Dict: result = wait_for_operation(vm, oper=operation) return result -def backup(vm) -> None: - """ - List of methods to backup data and information from the orignal instance - """ - _create_snapshot(vm) diff --git a/gce_rescue/tasks/backup_test.py b/gce_rescue/tasks/backup_test.py index 8f886d8..19151f8 100644 --- a/gce_rescue/tasks/backup_test.py +++ b/gce_rescue/tasks/backup_test.py @@ -39,7 +39,7 @@ def test_backup_metadata_items(self): def test_backup(self): """Test backup task.""" - backup.backup(self.vm) + backup.create_snapshot(self.vm) if __name__ == '__main__': diff --git a/gce_rescue/tasks/disks.py b/gce_rescue/tasks/disks.py index bc19a84..657fc9c 100644 --- a/gce_rescue/tasks/disks.py +++ b/gce_rescue/tasks/disks.py @@ -16,14 +16,17 @@ from typing import Dict import logging +from threading import Thread import googleapiclient.errors from gce_rescue.tasks.keeper import wait_for_operation -from gce_rescue.tasks.backup import backup +from gce_rescue.tasks.backup import create_snapshot from gce_rescue.utils import ThreadHandler as Handler +from googleapiclient.errors import HttpError _logger = logging.getLogger(__name__) +snapshot_thread = None def _create_rescue_disk(vm, source_disk: str) -> Dict: """ Create new temporary rescue disk based on source_disk. @@ -177,9 +180,17 @@ def _detach_disk(vm, disk: str) -> Dict: return result -def config_rescue_disks(vm) -> None: +def take_snapshot(vm, join_snapshot=None) -> None: + global snapshot_thread + if not join_snapshot: + snapshot_thread = Thread(target=create_snapshot, args=(vm,), daemon=True) + snapshot_thread.start() + else: + snapshot_thread.join() + + +def create_rescue_disk(vm) -> None: device_name = vm.disks['device_name'] - backup(vm) # task1 = multitasks.Handler( # target = backup, # kwargs={'vm' : vm} @@ -199,6 +210,18 @@ def config_rescue_disks(vm) -> None: boot=True ) +def list_snapshot(vm) -> str: + snapshot_name = f"{vm.disks['disk_name']}-{vm.ts}" + try: + result = vm.compute.snapshots().get( + snapshot=snapshot_name, + project=vm.project + ).execute() + except HttpError: + _logger.info('Snapshot was not found for VM in active rescue mode') + return '' + return snapshot_name + def restore_original_disk(vm) -> None: """ Restore tasks to the original disk """ device_name = vm.disks['device_name'] diff --git a/gce_rescue/tasks/disks_test.py b/gce_rescue/tasks/disks_test.py index 940e7a4..568b3e6 100644 --- a/gce_rescue/tasks/disks_test.py +++ b/gce_rescue/tasks/disks_test.py @@ -73,7 +73,7 @@ def test_config_rescue_disks(self): 'operations', 'disks', ]) - disks.config_rescue_disks(self.vm) + disks.create_rescue_disk(self.vm) def test_restore_original_disk(self): diff --git a/gce_rescue/tasks/metadata.py b/gce_rescue/tasks/metadata.py index e08dd0b..8ac1821 100644 --- a/gce_rescue/tasks/metadata.py +++ b/gce_rescue/tasks/metadata.py @@ -68,8 +68,11 @@ def restore_metadata_items(vm, remove_rescue_mode: bool = False) -> Dict: 'items': vm.backup_items } _logger.info('Restoring original metadata...') - if not remove_rescue_mode and not wait_for_os_boot(vm): - raise Exception('Guest OS boot timeout.') + + # gce-rescue/issues/21 - continue after wait period timed out + if not remove_rescue_mode: + wait_for_os_boot(vm) + operation = vm.compute.instances().setMetadata( **vm.project_data, instance = vm.name,