Skip to content

Commit e88e3d2

Browse files
authored
Merge pull request #90 from Azure/arc_lifecycle
Azure extension changes to Arc Ext
2 parents 9be1036 + 02d173a commit e88e3d2

File tree

2 files changed

+69
-51
lines changed

2 files changed

+69
-51
lines changed

src/core/src/service_interfaces/LifecycleManagerArc.py

Lines changed: 66 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -41,68 +41,83 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ
4141

4242
# region - State checkers
4343
def execution_start_check(self):
44-
self.composite_logger.log_debug("Execution start check initiating...")
45-
extension_sequence = self.read_extension_sequence()
46-
core_sequence = self.read_core_sequence()
47-
arc_core_sequence = self.read_arc_core_sequence()
48-
49-
if arc_core_sequence['completed'] == "False":
50-
self.composite_logger.log_warning("Arc extension with sequence number {0} is currently running. Exiting autoassessment".format(str(arc_core_sequence['number'])))
51-
self.update_core_sequence(completed=True) # forced-to-complete scenario | extension wrapper will be watching for this event
52-
self.env_layer.exit(0)
44+
self.composite_logger.log_debug("\nExecution start check initiating...")
5345

5446
if self.execution_config.exec_auto_assess_only:
55-
# newer sequence number has been observed, do not run
56-
if int(self.execution_config.sequence_number) < int(extension_sequence['number']) \
57-
or int(self.execution_config.sequence_number) < int(core_sequence['number']):
58-
self.composite_logger.log_warning("Auto-assessment not started as newer sequence number detected. [Attempted={0}][DetectedExt={1}][DetectedCore={2}]".format(str(self.execution_config.sequence_number), str(extension_sequence['number']), str(core_sequence['number'])))
59-
self.env_layer.exit(0)
60-
61-
# anomalous extension state encountered, do not run - this needs to be investigated if ever encountered
62-
if int(self.execution_config.sequence_number) > int(extension_sequence['number']) \
63-
or int(self.execution_config.sequence_number) > int(core_sequence['number']):
64-
self.composite_logger.log_error("Auto-assessment not started as an extension state anomaly was detected. [Attempted={0}][DetectedExt={1}][DetectedCore={2}]".format(str(self.execution_config.sequence_number), str(extension_sequence['number']),str(core_sequence['number'])))
65-
self.env_layer.exit(0)
66-
67-
# attempted sequence number is same as recorded core sequence - expected
68-
if int(self.execution_config.sequence_number) == int(core_sequence['number']):
47+
timer_start_time = self.env_layer.datetime.datetime_utcnow()
48+
while True:
49+
extension_sequence = self.read_extension_sequence()
50+
core_sequence = self.read_core_sequence()
51+
arc_core_sequence = self.read_arc_core_sequence()
52+
53+
# Timer evaluation
54+
current_time = self.env_layer.datetime.datetime_utcnow()
55+
elapsed_time_in_minutes = self.env_layer.datetime.total_minutes_from_time_delta(current_time - timer_start_time)
56+
57+
# Check for sequence number mismatches
58+
if int(self.execution_config.sequence_number) != int(core_sequence['number']):
59+
if int(self.execution_config.sequence_number) < int(extension_sequence['number']) or int(self.execution_config.sequence_number) < int(core_sequence['number']):
60+
self.composite_logger.log_warning("Auto-assessment NOT STARTED as newer sequence number detected. [Attempted={0}][DetectedExt={1}][DetectedCore={2}]".format(str(self.execution_config.sequence_number), str(extension_sequence['number']), str(core_sequence['number'])))
61+
elif int(self.execution_config.sequence_number) > int(extension_sequence['number']) or int(self.execution_config.sequence_number) > int(core_sequence['number']):
62+
self.composite_logger.log_error("Auto-assessment NOT STARTED as an extension state anomaly was detected. [Attempted={0}][DetectedExt={1}][DetectedCore={2}]".format(str(self.execution_config.sequence_number), str(extension_sequence['number']), str(core_sequence['number'])))
63+
self.composite_logger.file_logger.close()
64+
self.env_layer.exit(0)
65+
66+
# DEFINITELY NOT SAFE TO START. ARC Assessment/Patch Operation is running. It is not required to start Auto-Assessment
67+
if arc_core_sequence['completed'].lower() == 'false':
68+
self.composite_logger.log_error("Auto-assessment NOT STARTED as arc extension is running. [Attempted={0}][ARCSequenceNo={1}]".format(str(self.execution_config.sequence_number), str(arc_core_sequence['number'])))
69+
self.composite_logger.file_logger.close()
70+
self.env_layer.exit(0)
71+
72+
# DEFINITELY SAFE TO START. Correct sequence number marked as completed
6973
if core_sequence['completed'].lower() == 'true':
70-
self.composite_logger.log_debug("Auto-assessment is safe to start. Existing sequence number marked as completed.")
71-
self.update_core_sequence(completed=False) # signalling core restart with auto-assessment as its safe to do so
72-
else:
73-
self.composite_logger.log_debug("Auto-assessment may not be safe to start yet as core sequence is not marked completed.")
74-
if len(self.identify_running_processes(core_sequence['processIds'])) != 0:
75-
# NOT SAFE TO START
76-
# Possible reasons: full core operation is in progress (okay), some previous auto-assessment is still running (bad scheduling, adhoc run, or process stalled)
77-
self.composite_logger.log_warning("Auto-assessment is NOT safe to start yet. Existing core process(es) running. Exiting. [LastHeartbeat={0}][Operation={1}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action'])))
74+
self.composite_logger.log("Auto-assessment is SAFE to start. Existing sequence number marked as COMPLETED.\n")
75+
self.read_only_mode = False
76+
break
77+
78+
# Check for active running processes if not completed
79+
if len(self.identify_running_processes(core_sequence['processIds'])) != 0:
80+
if os.getpid() in core_sequence['processIds']:
81+
self.composite_logger.log("Auto-assessment is SAFE to start. Core sequence ownership is already established.\n")
82+
self.read_only_mode = False
83+
break
84+
85+
# DEFINITELY _NOT_ SAFE TO START. Possible reasons: full core operation is in progress (okay), some previous auto-assessment is still running (bad scheduling, adhoc run, or process stalled)
86+
if elapsed_time_in_minutes > Constants.MAX_AUTO_ASSESSMENT_WAIT_FOR_MAIN_CORE_EXEC_IN_MINUTES: # will wait up to the max allowed
87+
self.composite_logger.log_warning("Auto-assessment is NOT safe to start yet.TIMED-OUT waiting to Core to complete. EXITING. [LastHeartbeat={0}][Operation={1}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action'])))
88+
self.composite_logger.file_logger.close()
7889
self.env_layer.exit(0)
7990
else:
80-
# MAY BE SAFE TO START
81-
self.composite_logger.log_warning("Auto-assessment is LIKELY safe to start, BUT core sequence anomalies were detected. Evaluating further. [LastHeartbeat={0}][Operation={1}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action'])))
82-
# wait to see if Core comes back from a restart
83-
timer_start_time = self.env_layer.datetime.datetime_utcnow()
84-
while True:
85-
core_sequence = self.read_core_sequence()
86-
87-
# Main Core process suddenly started running (expected after reboot) - don't run
88-
if len(self.identify_running_processes(core_sequence['processIds'])) != 0:
89-
self.composite_logger.log_warning("Auto-assessment is NOT safe to start as core process(es) started running. Exiting. [LastHeartbeat={0}][Operation={1}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action'])))
90-
self.env_layer.exit(0)
91-
92-
# If timed out without the main Core process starting, assume it's safe to proceed
93-
current_time = self.env_layer.datetime.datetime_utcnow()
94-
elapsed_time_in_minutes = self.env_layer.datetime.total_minutes_from_time_delta(current_time - timer_start_time)
95-
if elapsed_time_in_minutes > Constants.REBOOT_BUFFER_IN_MINUTES:
96-
self.composite_logger.log_debug("Auto-assessment is now considered safe to start since Core did not start after a reboot buffer wait period. [LastHeartbeat={0}][Operation={1}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action'])))
97-
break
98-
99-
self.update_core_sequence(completed=False) # signalling core restart with auto-assessment as its safe to do so
91+
self.composite_logger.file_logger.flush()
92+
self.composite_logger.log_warning("Auto-assessment is NOT safe to start yet. Waiting to retry (up to set timeout). [LastHeartbeat={0}][Operation={1}][ElapsedTimeInMinutes={2}][TotalWaitRequiredInMinutes={3}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action']), str(elapsed_time_in_minutes), str(Constants.REBOOT_BUFFER_IN_MINUTES)))
93+
self.composite_logger.file_logger.flush()
94+
time.sleep(30)
95+
continue
96+
97+
# MAYBE SAFE TO START. Safely timeout if wait for any core restart events (from a potential reboot) has exceeded the maximum reboot buffer
98+
if elapsed_time_in_minutes > Constants.REBOOT_BUFFER_IN_MINUTES:
99+
self.composite_logger.log_debug("Auto-assessment is now considered SAFE to start as Core timed-out in reporting completion mark. [LastHeartbeat={0}][Operation={1}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action'])))
100+
self.read_only_mode = False
101+
break
102+
103+
# Briefly pause execution to re-check all states (including reboot buffer) again
104+
self.composite_logger.file_logger.flush()
105+
self.composite_logger.log_debug("Auto-assessment is waiting for Core state completion mark (up to set timeout). [LastHeartbeat={0}][Operation={1}][ElapsedTimeInMinutes={2}][TotalWaitRequiredInMinutes={3}]".format(str(core_sequence['lastHeartbeat']), str(core_sequence['action']), str(elapsed_time_in_minutes), str(Constants.REBOOT_BUFFER_IN_MINUTES)))
106+
self.composite_logger.file_logger.flush()
107+
time.sleep(30)
108+
109+
# Signalling take-over of core state by auto-assessment after safety checks for any competing process
110+
self.update_core_sequence(completed=False)
100111
else:
101112
# Logic for all non-Auto-assessment operations
113+
extension_sequence = self.read_extension_sequence()
114+
core_sequence = self.read_core_sequence()
115+
102116
if int(extension_sequence['number']) == int(self.execution_config.sequence_number):
103117
if core_sequence['completed'] is True:
104118
# Block attempts to execute what last completed (fully) again
105119
self.composite_logger.log_warning("LifecycleManager recorded false enable for completed sequence {0}.".format(str(extension_sequence['number'])))
120+
self.composite_logger.file_logger.close()
106121
self.env_layer.exit(0)
107122
else:
108123
# Incomplete current execution

src/core/tests/Test_LifecycleManagerArc.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def test_update_core_sequence_success(self): # failing test - needs to be cor
107107
dummy_folder = os.path.join(self.runtime.execution_config.config_folder, "CoreExt_lifecycle_manager_test")
108108
os.mkdir(dummy_folder)
109109
self.lifecycle_manager.core_state_file_path = dummy_folder
110+
self.lifecycle_manager.read_only_mode = False
110111
self.lifecycle_manager.update_core_sequence()
111112
self.assertTrue(os.path.exists(self.lifecycle_manager.core_state_file_path) and os.path.isfile(self.lifecycle_manager.core_state_file_path))
112113
core_sequence_json = self.lifecycle_manager.read_core_sequence()
@@ -135,6 +136,7 @@ def test_read_arc_core_sequence_fail(self): # failing test - needs to be cor
135136
old_core_state_file_path = self.lifecycle_manager.core_state_file_path
136137
# File not found at location
137138
self.lifecycle_manager.arc_core_state_file_path = "dummy"
139+
self.lifecycle_manager.read_only_mode = False
138140
ext_state_json = self.lifecycle_manager.read_arc_core_sequence()
139141
self.assertEquals(ext_state_json['completed'], 'True')
140142

@@ -146,6 +148,7 @@ def test_read_arc_core_sequence_fail(self): # failing test - needs to be cor
146148

147149
def test_read_arc_core_sequence_success(self): # failing test - needs to be corrected with Arc code changes
148150
self.lifecycle_manager.arc_core_state_file_path = self.lifecycle_manager.core_state_file_path
151+
self.lifecycle_manager.read_only_mode = False
149152
self.lifecycle_manager.update_core_sequence(completed=True)
150153
# Completed True Case
151154
arc_core_state = self.lifecycle_manager.read_arc_core_sequence()

0 commit comments

Comments
 (0)