Skip to content

Commit e875112

Browse files
authored
Merge pull request #92 from Azure/bankiel-zypper-retries
Add retries for zypper package manager repo_refresh
2 parents 24e61ac + 8db03d4 commit e875112

15 files changed

+123
-26
lines changed

src/core/src/bootstrap/Constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,11 @@ class AutoAssessmentStates(EnumBackport):
156156

157157
UNKNOWN_PACKAGE_SIZE = "Unknown"
158158
PACKAGE_STATUS_REFRESH_RATE_IN_SECONDS = 10
159-
MAX_FILE_OPERATION_RETRY_COUNT = 5
159+
MAX_FILE_OPERATION_RETRY_COUNT = 10
160160
MAX_ASSESSMENT_RETRY_COUNT = 5
161161
MAX_INSTALLATION_RETRY_COUNT = 3
162162
MAX_IMDS_CONNECTION_RETRY_COUNT = 5
163+
MAX_ZYPPER_REPO_REFRESH_RETRY_COUNT = 5
163164

164165
class PackageClassification(EnumBackport):
165166
UNCLASSIFIED = 'Unclassified'

src/core/src/bootstrap/EnvLayer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def open(self, file_path, mode):
281281
try:
282282
return open(real_path, mode)
283283
except Exception as error:
284-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
284+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
285285
time.sleep(i + 1)
286286
else:
287287
raise Exception("Unable to open {0} (retries exhausted). Error: {1}.".format(str(real_path), repr(error)))
@@ -310,7 +310,7 @@ def read_with_retry(self, file_path_or_handle):
310310
self.__write_record(operation, code=0, output=value, delay=0)
311311
return value
312312
except Exception as error:
313-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
313+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
314314
time.sleep(i + 1)
315315
else:
316316
raise Exception("Unable to read from {0} (retries exhausted). Error: {1}.".format(str(file_path_or_handle), repr(error)))
@@ -327,7 +327,7 @@ def write_with_retry(self, file_path_or_handle, data, mode='a+'):
327327
file_handle.write(str(data))
328328
break
329329
except Exception as error:
330-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
330+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
331331
time.sleep(i + 1)
332332
else:
333333
raise Exception("Unable to write to {0} (retries exhausted). Error: {1}.".format(str(file_handle.name), repr(error)))
@@ -346,7 +346,7 @@ def write_with_retry_using_temp_file(file_path, data, mode='w'):
346346
shutil.move(tempname, file_path)
347347
break
348348
except Exception as error:
349-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
349+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
350350
time.sleep(i + 1)
351351
else:
352352
raise Exception("Unable to write to {0} (retries exhausted). Error: {1}.".format(str(file_path), repr(error)))

src/core/src/core_logic/PatchAssessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def start_assessment(self):
6262
self.status_handler.set_assessment_substatus_json(status=Constants.STATUS_SUCCESS)
6363
break
6464
except Exception as error:
65-
if i < Constants.MAX_ASSESSMENT_RETRY_COUNT:
65+
if i < Constants.MAX_ASSESSMENT_RETRY_COUNT - 1:
6666
error_msg = 'Retryable error retrieving available patches: ' + repr(error)
6767
self.composite_logger.log_warning(error_msg)
6868
self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.DEFAULT_ERROR)

src/core/src/core_logic/RebootManager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ def start_reboot_if_required_and_time_available(self, current_time_available):
9191
""" Starts a reboot if required. Happens only at the end of the run if required. """
9292
self.composite_logger.log("\nReboot Management")
9393
reboot_pending = False if not self.status_handler else self.status_handler.is_reboot_pending
94+
reboot_pending = self.package_manager.force_reboot or reboot_pending
95+
96+
if self.package_manager.force_reboot:
97+
self.composite_logger.log("Reboot is occurring to mitigate an issue with the package manager.")
9498

9599
# return if never
96100
if self.reboot_setting == Constants.REBOOT_NEVER:

src/core/src/package_managers/PackageManager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ
3333
self.single_package_upgrade_cmd = ''
3434
self.single_package_upgrade_simulation_cmd = 'simulate-install'
3535
self.package_manager_settings = {}
36+
self.force_reboot = False
3637

3738
# Enabling caching for high performance retrieval (only for code explicitly requesting it)
3839
self.all_updates_cached = []

src/core/src/package_managers/ZypperPackageManager.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
"""ZypperPackageManager for SUSE"""
1818
import re
19+
import time
1920
from core.src.package_managers.PackageManager import PackageManager
2021
from core.src.bootstrap.Constants import Constants
2122

@@ -54,7 +55,31 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ
5455
def refresh_repo(self):
5556
self.composite_logger.log("Refreshing local repo...")
5657
# self.invoke_package_manager(self.repo_clean) # purges local metadata for rebuild - addresses a possible customer environment error
57-
self.invoke_package_manager(self.repo_refresh)
58+
for i in range(0, Constants.MAX_ZYPPER_REPO_REFRESH_RETRY_COUNT):
59+
try:
60+
self.invoke_package_manager(self.repo_refresh)
61+
return
62+
except Exception as error:
63+
if i < Constants.MAX_ZYPPER_REPO_REFRESH_RETRY_COUNT - 1:
64+
self.composite_logger.log_warning("Exception on package manager refresh repo. [Exception={0}] [RetryCount={1}]".format(repr(error), str(i)))
65+
time.sleep(pow(2, i) + 1)
66+
else:
67+
if Constants.ERROR_ADDED_TO_STATUS in repr(error):
68+
error.args = error.args[:1] # remove Constants.ERROR_ADDED_TO_STATUS flag to add new message to status
69+
70+
error_msg = "Unable to refresh repo (retries exhausted). [{0}] [RetryCount={1}]".format(repr(error), str(i))
71+
72+
# Reboot if not already done
73+
if self.status_handler.get_installation_reboot_status() == Constants.RebootStatus.COMPLETED:
74+
error_msg = "Unable to refresh repo (retries exhausted after reboot). [{0}] [RetryCount={1}]".format(repr(error), str(i))
75+
else:
76+
self.composite_logger.log_warning("Setting force_reboot flag to True.")
77+
self.force_reboot = True
78+
79+
self.composite_logger.log_warning(error_msg)
80+
self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.PACKAGE_MANAGER_FAILURE)
81+
82+
raise Exception(error_msg)
5883

5984
# region Get Available Updates
6085
def invoke_package_manager(self, command):

src/core/src/service_interfaces/LifecycleManager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def read_extension_sequence(self):
5656
with self.env_layer.file_system.open(self.ext_state_file_path, mode="r") as file_handle:
5757
return json.load(file_handle)['extensionSequence']
5858
except Exception as error:
59-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
59+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
6060
self.composite_logger.log_warning("Exception on extension sequence read. [Exception={0}] [RetryCount={1}]".format(repr(error), str(i)))
6161
time.sleep(i+1)
6262
else:
@@ -115,7 +115,7 @@ def read_core_sequence(self):
115115

116116
return core_sequence
117117
except Exception as error:
118-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
118+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
119119
self.composite_logger.log_warning("Exception on core sequence read. [Exception={0}] [RetryCount={1}]".format(repr(error), str(i)))
120120
time.sleep(i + 1)
121121
else:
@@ -145,7 +145,7 @@ def update_core_sequence(self, completed=False):
145145
with self.env_layer.file_system.open(self.core_state_file_path, 'w+') as file_handle:
146146
file_handle.write(core_state_payload)
147147
except Exception as error:
148-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
148+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
149149
self.composite_logger.log_warning("Exception on core sequence update. [Exception={0}] [RetryCount={1}]".format(repr(error), str(i)))
150150
time.sleep(i + 1)
151151
else:

src/core/src/service_interfaces/StatusHandler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ def __load_status_file_components(self, initial_load=False):
501501
with self.env_layer.file_system.open(self.status_file_path, 'r') as file_handle:
502502
status_file_data_raw = json.load(file_handle)[0] # structure is array of 1
503503
except Exception as error:
504-
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT:
504+
if i < Constants.MAX_FILE_OPERATION_RETRY_COUNT - 1:
505505
time.sleep(i + 1)
506506
else:
507507
self.composite_logger.log_error("Unable to read status file (retries exhausted). Error: {0}.".format(repr(error)))

src/core/tests/Test_CoreMain.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def test_assessment_operation_fail(self):
278278
self.assertEquals(len(substatus_file_data), 2)
279279
self.assertTrue(substatus_file_data[0]["name"] == Constants.PATCH_ASSESSMENT_SUMMARY)
280280
self.assertTrue(substatus_file_data[0]["status"].lower() == Constants.STATUS_ERROR.lower())
281-
self.assertEqual(len(json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"]), 2)
281+
self.assertEqual(len(json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"]), 5)
282282
self.assertTrue(substatus_file_data[1]["name"] == Constants.CONFIGURE_PATCHING_SUMMARY)
283283
self.assertTrue(substatus_file_data[1]["status"].lower() == Constants.STATUS_SUCCESS.lower())
284284
runtime.stop()
@@ -753,6 +753,77 @@ def test_auto_assessment_success_with_installation_in_prev_operation_on_same_seq
753753

754754
runtime.stop()
755755

756+
def test_assessment_operation_fail_after_package_manager_reboot(self):
757+
argument_composer = ArgumentComposer()
758+
argument_composer.operation = Constants.ASSESSMENT
759+
runtime = RuntimeCompositor(argument_composer.get_composed_arguments(), True, Constants.ZYPPER)
760+
runtime.set_legacy_test_type('ExceptionPath')
761+
CoreMain(argument_composer.get_composed_arguments())
762+
763+
# check telemetry events
764+
self.__check_telemetry_events(runtime)
765+
766+
# mock rebooting
767+
runtime.status_handler.set_installation_reboot_status(Constants.RebootStatus.REQUIRED)
768+
runtime.status_handler.set_installation_reboot_status(Constants.RebootStatus.STARTED)
769+
runtime.status_handler.is_reboot_pending = False
770+
runtime.package_manager.force_reboot = False
771+
runtime.status_handler.set_installation_reboot_status(Constants.RebootStatus.COMPLETED)
772+
773+
# run coremain again
774+
CoreMain(argument_composer.get_composed_arguments())
775+
776+
# check telemetry events
777+
self.__check_telemetry_events(runtime)
778+
779+
# check status file
780+
with runtime.env_layer.file_system.open(runtime.execution_config.status_file_path, 'r') as file_handle:
781+
substatus_file_data = json.load(file_handle)[0]["status"]["substatus"]
782+
self.assertEquals(len(substatus_file_data), 3)
783+
self.assertTrue(substatus_file_data[0]["name"] == Constants.PATCH_ASSESSMENT_SUMMARY)
784+
self.assertTrue(substatus_file_data[0]["status"].lower() == Constants.STATUS_ERROR.lower())
785+
self.assertTrue(substatus_file_data[1]["name"] == Constants.PATCH_INSTALLATION_SUMMARY)
786+
self.assertTrue(substatus_file_data[1]["status"].lower() == Constants.STATUS_TRANSITIONING.lower())
787+
self.assertTrue(substatus_file_data[2]["name"] == Constants.CONFIGURE_PATCHING_SUMMARY)
788+
self.assertTrue(substatus_file_data[2]["status"].lower() == Constants.STATUS_SUCCESS.lower())
789+
runtime.stop()
790+
791+
def test_assessment_operation_success_after_package_manager_reboot(self):
792+
argument_composer = ArgumentComposer()
793+
argument_composer.operation = Constants.ASSESSMENT
794+
runtime = RuntimeCompositor(argument_composer.get_composed_arguments(), True, Constants.ZYPPER)
795+
runtime.set_legacy_test_type('ExceptionPath')
796+
CoreMain(argument_composer.get_composed_arguments())
797+
798+
# check telemetry events
799+
self.__check_telemetry_events(runtime)
800+
801+
# mock rebooting
802+
runtime.status_handler.set_installation_reboot_status(Constants.RebootStatus.REQUIRED)
803+
runtime.status_handler.set_installation_reboot_status(Constants.RebootStatus.STARTED)
804+
runtime.status_handler.is_reboot_pending = False
805+
runtime.package_manager.force_reboot = False
806+
runtime.status_handler.set_installation_reboot_status(Constants.RebootStatus.COMPLETED)
807+
808+
# run coremain again, but with success path this time
809+
runtime.set_legacy_test_type('SuccessInstallPath')
810+
CoreMain(argument_composer.get_composed_arguments())
811+
812+
# check telemetry events
813+
self.__check_telemetry_events(runtime)
814+
815+
# check status file
816+
with runtime.env_layer.file_system.open(runtime.execution_config.status_file_path, 'r') as file_handle:
817+
substatus_file_data = json.load(file_handle)[0]["status"]["substatus"]
818+
self.assertEquals(len(substatus_file_data), 3)
819+
self.assertTrue(substatus_file_data[0]["name"] == Constants.PATCH_ASSESSMENT_SUMMARY)
820+
self.assertTrue(substatus_file_data[0]["status"].lower() == Constants.STATUS_SUCCESS.lower())
821+
self.assertTrue(substatus_file_data[1]["name"] == Constants.PATCH_INSTALLATION_SUMMARY)
822+
self.assertTrue(substatus_file_data[1]["status"].lower() == Constants.STATUS_TRANSITIONING.lower())
823+
self.assertTrue(substatus_file_data[2]["name"] == Constants.CONFIGURE_PATCHING_SUMMARY)
824+
self.assertTrue(substatus_file_data[2]["status"].lower() == Constants.STATUS_SUCCESS.lower())
825+
runtime.stop()
826+
756827
def __check_telemetry_events(self, runtime):
757828
all_events = os.listdir(runtime.telemetry_writer.events_folder_path)
758829
self.assertTrue(len(all_events) > 0)

src/core/tests/Test_LifecycleManagerArc.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,7 @@ def test_read_extension_sequence_fail(self):
5454
# file open throws exception
5555
self.lifecycle_manager.ext_state_file_path = old_ext_state_file_path
5656
self.runtime.env_layer.file_system.open = self.mock_file_open_throw_exception
57-
ext_state_json = self.lifecycle_manager.read_extension_sequence()
58-
self.assertEquals(ext_state_json, None)
59-
57+
self.assertRaises(Exception, self.lifecycle_manager.read_extension_sequence)
6058

6159
def test_read_extension_sequence_success(self):
6260
ext_state_json = self.lifecycle_manager.read_extension_sequence()
@@ -66,8 +64,7 @@ def test_read_extension_sequence_success(self):
6664
def test_read_core_sequence_fail(self):
6765
# file open throws exception
6866
self.runtime.env_layer.file_system.open = self.mock_file_open_throw_exception
69-
core_sequence_json = self.lifecycle_manager.read_core_sequence()
70-
self.assertEquals(core_sequence_json, None)
67+
self.assertRaises(Exception, self.lifecycle_manager.read_core_sequence)
7168

7269
def test_read_core_sequence_success(self):
7370
old_core_state_file_path = self.lifecycle_manager.core_state_file_path

0 commit comments

Comments
 (0)