Skip to content

Commit 7d90cff

Browse files
authored
Reduced set: Safe-deployment enhancements (#223)
* Reduced set - safe-deployment enhancements * Informational message requested added. * Increased test coverage * Test coverage and retry exhaustion throw * Minor comments
1 parent b50ef62 commit 7d90cff

19 files changed

+320
-81
lines changed

src/core/src/bootstrap/Constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ class PatchOperationTopLevelErrorCode(EnumBackport):
270270
ERROR = 1
271271

272272
class PatchOperationErrorCodes(EnumBackport):
273+
INFORMATIONAL = "INFORMATIONAL"
273274
DEFAULT_ERROR = "ERROR" # default error code
274275
OPERATION_FAILED = "OPERATION_FAILED"
275276
PACKAGE_MANAGER_FAILURE = "PACKAGE_MANAGER_FAILURE"
@@ -310,6 +311,7 @@ class TelemetryTaskName(EnumBackport):
310311
TELEMETRY_NOT_COMPATIBLE_ERROR_MSG = "Unsupported older Azure Linux Agent version. To resolve: http://aka.ms/UpdateLinuxAgent"
311312
TELEMETRY_COMPATIBLE_MSG = "Minimum Azure Linux Agent version prerequisite met"
312313
PYTHON_NOT_COMPATIBLE_ERROR_MSG = "Unsupported older Python version. Minimum Python version required is 2.7. [DetectedPythonVersion={0}]"
314+
INFO_STRICT_SDP_SUCCESS = "Success: Safely patched your VM in a AzGPS-coordinated global rollout. https://aka.ms/AzGPS/StrictSDP [Target={0}]"
313315
UTC_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
314316

315317
# EnvLayer Constants

src/core/src/core_logic/ExecutionConfig.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def __init__(self, env_layer, composite_logger, execution_parameters):
5959
self.excluded_package_name_mask_list = self.__get_execution_configuration_value_safely(self.config_settings, Constants.ConfigSettings.PATCHES_TO_EXCLUDE, [])
6060
self.maintenance_run_id = self.__get_execution_configuration_value_safely(self.config_settings, Constants.ConfigSettings.MAINTENANCE_RUN_ID)
6161
self.health_store_id = self.__get_execution_configuration_value_safely(self.config_settings, Constants.ConfigSettings.HEALTH_STORE_ID)
62+
self.max_patch_publish_date = self.__get_max_patch_publish_date(self.health_store_id)
6263
if self.operation == Constants.INSTALLATION:
6364
self.reboot_setting = self.config_settings[Constants.ConfigSettings.REBOOT_SETTING] # expected to throw if not present
6465
else:
@@ -99,6 +100,18 @@ def __transform_execution_config_for_auto_assessment(self):
99100
self.patch_mode = None
100101
self.composite_logger.log_debug("Setting execution configuration values for auto assessment. [GeneratedActivityId={0}][StartTime={1}]".format(self.activity_id, str(self.start_time)))
101102

103+
def __get_max_patch_publish_date(self, health_store_id):
104+
# type: (str) -> object
105+
""" Obtains implicit date ceiling for published date - converts pub_off_sku_2024.04.01 to 20240401T000000Z """
106+
max_patch_publish_date = str()
107+
if health_store_id is not None and health_store_id != "":
108+
split = health_store_id.split("_")
109+
if len(split) == 4 and len(split[3]) == 10:
110+
max_patch_publish_date = "{0}T000000Z".format(split[3].replace(".", ""))
111+
112+
self.composite_logger.log_debug("[EC] Getting max patch publish date. [MaxPatchPublishDate={0}][HealthStoreId={1}]".format(str(max_patch_publish_date), str(health_store_id)))
113+
return max_patch_publish_date
114+
102115
@staticmethod
103116
def __get_value_from_argv(argv, key, default_value=Constants.DEFAULT_UNSPECIFIED_VALUE):
104117
""" Discovers the value associated with a specific parameter in input arguments. """

src/core/src/core_logic/PatchInstaller.py

Lines changed: 79 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,20 @@ def start_installation(self, simulate=False):
8181
self.composite_logger.log_debug("Attempting to reboot the machine prior to patch installation as there is a reboot pending...")
8282
reboot_manager.start_reboot_if_required_and_time_available(maintenance_window.get_remaining_time_in_minutes(None, False))
8383

84-
# Install Updates
85-
installed_update_count, update_run_successful, maintenance_window_exceeded = self.install_updates(maintenance_window, package_manager, simulate)
84+
if self.execution_config.max_patch_publish_date != str():
85+
self.package_manager.set_max_patch_publish_date(self.execution_config.max_patch_publish_date)
86+
87+
if self.package_manager.max_patch_publish_date != str():
88+
""" Strict SDP with the package manager that supports it """
89+
installed_update_count, update_run_successful, maintenance_window_exceeded = self.install_updates_azgps_coordinated(maintenance_window, package_manager, simulate)
90+
package_manager.set_package_manager_setting(Constants.PACKAGE_MGR_SETTING_REPEAT_PATCH_OPERATION, bool(not update_run_successful))
91+
if update_run_successful:
92+
self.composite_logger.log_debug(Constants.INFO_STRICT_SDP_SUCCESS.format(self.execution_config.max_patch_publish_date))
93+
self.status_handler.add_error_to_status(Constants.INFO_STRICT_SDP_SUCCESS.format(self.execution_config.max_patch_publish_date), error_code=Constants.PatchOperationErrorCodes.INFORMATIONAL)
94+
else:
95+
""" Regular patch installation flow - non-AzGPS-coordinated and (AzGPS-coordinated without strict SDP)"""
96+
installed_update_count, update_run_successful, maintenance_window_exceeded = self.install_updates(maintenance_window, package_manager, simulate)
97+
8698
retry_count = 1
8799
# Repeat patch installation if flagged as required and time is available
88100
if not maintenance_window_exceeded and package_manager.get_package_manager_setting(Constants.PACKAGE_MGR_SETTING_REPEAT_PATCH_OPERATION, False):
@@ -148,6 +160,66 @@ def raise_if_min_python_version_not_met(self):
148160
self.status_handler.set_installation_substatus_json(status=Constants.STATUS_ERROR)
149161
raise Exception(error_msg)
150162

163+
def install_updates_azgps_coordinated(self, maintenance_window, package_manager, simulate=False):
164+
""" Special-casing installation as it meets the following criteria:
165+
- Maintenance window is always guaranteed to be nearly 4 hours (235 minutes). Customer-facing maintenance windows are much larger (system limitation).
166+
- Barring reboot, the core Azure customer-base moving to coordinated, unattended upgrades is currently on a 24x7 MW.
167+
- Built in service-level retries and management of outcomes. Reboot will only happen within the core maintenance window (and won't be delayed).
168+
- Corner-case transient failures are immaterial to the overall functioning of AzGPS coordinated upgrades (eventual consistency).
169+
- Only security updates (no other configuration) - simplistic execution flow; no advanced evaluation is desired or necessary.
170+
"""
171+
installed_update_count = 0 # includes dependencies
172+
patch_installation_successful = True
173+
maintenance_window_exceeded = False
174+
remaining_time = maintenance_window.get_remaining_time_in_minutes()
175+
176+
try:
177+
all_packages, all_package_versions = package_manager.get_all_updates(True)
178+
packages, package_versions = package_manager.get_security_updates()
179+
self.last_still_needed_packages = list(all_packages)
180+
self.last_still_needed_package_versions = list(all_package_versions)
181+
182+
not_included_packages, not_included_package_versions = self.get_not_included_updates(package_manager, packages)
183+
packages, package_versions, self.skipped_esm_packages, self.skipped_esm_package_versions, self.esm_packages_found_without_attach = package_manager.separate_out_esm_packages(packages, package_versions)
184+
185+
self.status_handler.set_package_install_status(not_included_packages, not_included_package_versions, Constants.NOT_SELECTED)
186+
self.status_handler.set_package_install_status(packages, package_versions, Constants.PENDING)
187+
self.status_handler.set_package_install_status(self.skipped_esm_packages, self.skipped_esm_package_versions, Constants.FAILED)
188+
189+
self.status_handler.set_package_install_status_classification(packages, package_versions, classification="Security")
190+
package_manager.set_security_esm_package_status(Constants.INSTALLATION, packages)
191+
192+
installed_update_count = 0 # includes dependencies
193+
patch_installation_successful = True
194+
maintenance_window_exceeded = False
195+
196+
install_result = Constants.FAILED
197+
for i in range(0, Constants.MAX_INSTALLATION_RETRY_COUNT):
198+
code, out = package_manager.install_security_updates_azgps_coordinated()
199+
installed_update_count += self.perform_status_reconciliation_conditionally(package_manager)
200+
201+
remaining_time = maintenance_window.get_remaining_time_in_minutes()
202+
if remaining_time < 120:
203+
raise Exception("Not enough safety-buffer to continue strict safe deployment.")
204+
205+
if code != 0: # will need to be modified for other package managers
206+
if i < Constants.MAX_INSTALLATION_RETRY_COUNT - 1:
207+
time.sleep(i * 5)
208+
self.composite_logger.log_warning("[PI][AzGPS-Coordinated] Non-zero return. Retrying. [RetryCount={0}][TimeRemainingInMins={1}][Code={2}][Output={3}]".format(str(i), str(remaining_time), str(code), out))
209+
else:
210+
raise Exception("AzGPS Strict SDP retries exhausted. [RetryCount={0}]".format(str(i)))
211+
else:
212+
patch_installation_successful = True
213+
break
214+
except Exception as error:
215+
error_msg = "AzGPS strict safe deployment to target date hit a failure. Defaulting to regular upgrades. [MaxPatchPublishDate={0}]".format(self.execution_config.max_patch_publish_date)
216+
self.composite_logger.log_error(error_msg + "[Error={0}]".format(repr(error)))
217+
self.status_handler.add_error_to_status(error_msg)
218+
self.package_manager.set_max_patch_publish_date() # fall-back
219+
patch_installation_successful = False
220+
221+
return installed_update_count, patch_installation_successful, maintenance_window_exceeded
222+
151223
def install_updates(self, maintenance_window, package_manager, simulate=False):
152224
"""wrapper function of installing updates"""
153225
self.composite_logger.log("\n\nGetting available updates...")
@@ -576,20 +648,10 @@ def mark_installation_completed(self):
576648
self.status_handler.set_installation_substatus_json(status=Constants.STATUS_WARNING)
577649

578650
# Update patch metadata in status for auto patching request, to be reported to healthStore
579-
# When available, HealthStoreId always takes precedence over the 'overriden' Maintenance Run Id that is being re-purposed for other reasons
580-
# In the future, maintenance run id will be completely deprecated for health store reporting.
581-
patch_version_raw = self.execution_config.health_store_id if self.execution_config.health_store_id is not None else self.execution_config.maintenance_run_id
582-
self.composite_logger.log_debug("Patch version raw value set. [Raw={0}][HealthStoreId={1}][MaintenanceRunId={2}]".format(str(patch_version_raw), str(self.execution_config.health_store_id), str(self.execution_config.maintenance_run_id)))
583-
584-
if patch_version_raw is not None:
585-
try:
586-
patch_version = datetime.datetime.strptime(patch_version_raw.split(" ")[0], "%m/%d/%Y").strftime('%Y.%m.%d')
587-
except ValueError as e:
588-
patch_version = str(patch_version_raw) # CRP is supposed to guarantee that healthStoreId is always in the correct format; (Legacy) Maintenance Run Id may not be; what happens prior to this is just defensive coding
589-
self.composite_logger.log_debug("Patch version _may_ be in an incorrect format. [CommonFormat=DateTimeUTC][Actual={0}][Error={1}]".format(str(self.execution_config.maintenance_run_id), repr(e)))
590-
651+
self.composite_logger.log_debug("[PI] Reviewing final healthstore record write. [HealthStoreId={0}][MaintenanceRunId={1}]".format(str(self.execution_config.health_store_id), str(self.execution_config.maintenance_run_id)))
652+
if self.execution_config.health_store_id is not None:
591653
self.status_handler.set_patch_metadata_for_healthstore_substatus_json(
592-
patch_version=patch_version if patch_version is not None and patch_version != "" else Constants.PATCH_VERSION_UNKNOWN,
654+
patch_version=self.execution_config.health_store_id,
593655
report_to_healthstore=True,
594656
wait_after_update=False)
595657

@@ -602,7 +664,7 @@ def perform_status_reconciliation_conditionally(self, package_manager, condition
602664
if not condition:
603665
return 0
604666

605-
self.composite_logger.log_debug("\nStarting status reconciliation...")
667+
self.composite_logger.log_verbose("\nStarting status reconciliation...")
606668
start_time = time.time()
607669
still_needed_packages, still_needed_package_versions = package_manager.get_all_updates(False) # do not use cache
608670
successful_packages = []
@@ -615,7 +677,7 @@ def perform_status_reconciliation_conditionally(self, package_manager, condition
615677
self.status_handler.set_package_install_status(successful_packages, successful_package_versions, Constants.INSTALLED)
616678
self.last_still_needed_packages = still_needed_packages
617679
self.last_still_needed_package_versions = still_needed_package_versions
618-
self.composite_logger.log_debug("Completed status reconciliation. Time taken: " + str(time.time() - start_time) + " seconds.")
680+
self.composite_logger.log_verbose("Completed status reconciliation. Time taken: " + str(time.time() - start_time) + " seconds.")
619681
return len(successful_packages)
620682
# endregion
621683

0 commit comments

Comments
 (0)