Skip to content

Commit a715b00

Browse files
authored
Telemetry not supported incident mitigation (#114)
Mitigation + additional logging for telemetry not supported incident.
1 parent 371959f commit a715b00

File tree

10 files changed

+70
-27
lines changed

10 files changed

+70
-27
lines changed

src/core/src/core_logic/ConfigurePatchingProcessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def __raise_if_agent_incompatible(self):
139139
self.composite_logger.log("Skipping agent compatibility check for Arc cloud type when operation is not manual")
140140
return
141141
if not self.telemetry_writer.is_agent_compatible():
142-
error_msg = Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG
142+
error_msg = "{0} [{1}]".format(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG, self.telemetry_writer.get_telemetry_diagnostics())
143143
self.composite_logger.log_error(error_msg)
144144
raise Exception(error_msg)
145145

src/core/src/core_logic/PatchAssessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def raise_if_agent_incompatible(self):
8484
self.composite_logger.log("Skipping agent compatibility check for Arc cloud type when operation is not manual")
8585
return
8686
if not self.telemetry_writer.is_agent_compatible():
87-
error_msg = Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG
87+
error_msg = "{0} [{1}]".format(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG, self.telemetry_writer.get_telemetry_diagnostics())
8888
self.composite_logger.log_error(error_msg)
8989
raise Exception(error_msg)
9090

src/core/src/core_logic/PatchInstaller.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def raise_if_agent_incompatible(self):
103103
self.composite_logger.log("Skipping agent compatibility check for Arc cloud type when operation is not manual")
104104
return
105105
if not self.telemetry_writer.is_agent_compatible():
106-
error_msg = Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG
106+
error_msg = "{0} [{1}]".format(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG, self.telemetry_writer.get_telemetry_diagnostics())
107107
self.composite_logger.log_error(error_msg)
108108
raise Exception(error_msg)
109109

src/core/src/service_interfaces/TelemetryWriter.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ def __init__(self, env_layer, composite_logger, events_folder_path):
3737
self.__telemetry_event_counter = 1 # will be added at the end of each event sent to telemetry to assist in tracing and identifying event/message loss in telemetry
3838
self.start_time_for_event_count_throttle_check = datetime.datetime.utcnow()
3939
self.event_count = 1
40+
self.agent_env_var_code = 0 # code to give details on what went wrong when getting env var
4041

41-
if self.__get_agent_supports_telemetry_from_env_var() and self.__get_events_folder_path_exists(events_folder_path):
42+
if self.__get_events_folder_path_exists(events_folder_path):
4243
self.events_folder_path = events_folder_path
4344
self.__is_agent_compatible = True
4445

@@ -127,15 +128,18 @@ def __get_agent_supports_telemetry_from_env_var(self):
127128
""" Returns True if the env var AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES has a key of
128129
ExtensionTelemetryPipeline in the list. Value of the env var looks like this:
129130
'[{ "Key": "ExtensionTelemetryPipeline", "Value": "1.0"}]' """
131+
self.agent_env_var_code = 0
130132
features_keyvalue_list_str = os.getenv(Constants.AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES_ENV_VAR)
131133
if features_keyvalue_list_str is None:
132134
self.composite_logger.log_error('Failed to get guest agent supported features from env var. [Var={0}]'.format(Constants.AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES_ENV_VAR))
135+
self.agent_env_var_code = 1
133136
return False
134137

135138
features_keyvalue_list = json.loads(features_keyvalue_list_str)
136139
telemetry_supported_key_exists = any(kv_pair for kv_pair in features_keyvalue_list if kv_pair['Key'] == Constants.TELEMETRY_EXTENSION_PIPELINE_SUPPORTED_KEY)
137140
if telemetry_supported_key_exists is False:
138141
self.composite_logger.log_error('Guest agent does not support telemetry. [Error=Key not found: {0}]'.format(Constants.TELEMETRY_EXTENSION_PIPELINE_SUPPORTED_KEY))
142+
self.agent_env_var_code = 2
139143

140144
return telemetry_supported_key_exists
141145

@@ -189,6 +193,28 @@ def __extract_agent_version_from_string(self, pattern, string):
189193

190194
return version_search.group()
191195

196+
def get_telemetry_diagnostics(self):
197+
""" Returns information about the guest agent telemetry for debugging purposes.
198+
Information message abbreviations:
199+
AV: Agent Version (Guest Agent)
200+
AGSV: Agent Goal State Version (Guest Agent)
201+
EFE: Events Folder Exists (on disk)
202+
EV: Env Var exists
203+
EVC: Env Var Code (more detailed information on what went wrong when getting the env var)
204+
See __get_agent_supports_telemetry_from_env_var for more information
205+
"""
206+
agent_version = self.get_agent_version()
207+
agent_goalstate_version = self.get_goal_state_agent_version()
208+
events_folder_exists = self.__get_events_folder_path_exists(self.events_folder_path)
209+
telemetry_env_var_supported = self.__get_agent_supports_telemetry_from_env_var()
210+
return "AV:{0}, AGSV:{1}, EFE:{2}, EV:{3}, EVC:{4}".format(
211+
str(agent_version) if agent_version is not None else "-1",
212+
str(agent_goalstate_version) if agent_goalstate_version is not None else "-1",
213+
"1" if events_folder_exists is True else "0",
214+
"1" if telemetry_env_var_supported is True else "0",
215+
str(self.agent_env_var_code)
216+
)
217+
192218
def __log_agent_information(self):
193219
""" Logs WALinuxAgent version information. """
194220
agent_version = self.get_agent_version()

src/extension/src/ActionHandler.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -112,30 +112,44 @@ def tear_down(self):
112112
self.file_logger = None
113113

114114
def setup_telemetry(self):
115-
""" Init telemetry if agent is compatible (env var check) AND events_folder is specified.
115+
""" Init telemetry if agent is compatible (events_folder is specified).
116116
Otherwise, error since guest agent does not support telemetry. """
117117
events_folder = self.ext_env_handler.events_folder
118-
if events_folder is not None and self.telemetry_writer.is_agent_compatible():
118+
if events_folder is not None:
119119
# Guest agent fully supports telemetry
120-
''' NOTE: unlike core, this code will run even if events_folder does not exist,
121-
since telemetry_writer.is_agent_compatible() only checks the env var.
122-
This ensures that the events_folder exists once core runs. '''
120+
self.__log_telemetry_info(telemetry_supported=True)
121+
123122
if not os.path.exists(events_folder):
124123
os.mkdir(events_folder)
125-
self.logger.log("Events folder path found in HandlerEnvironment but does not exist on disk. Creating now. [Path={0}]".format(str(events_folder)))
126-
127-
self.logger.log(Constants.TELEMETRY_AT_AGENT_COMPATIBLE_MSG)
124+
self.logger.log("Events folder path found in HandlerEnvironment but does not exist on disk. Creating now. [Path={0}][AgentVersion={1}]".format(
125+
str(events_folder), str(self.telemetry_writer.get_agent_version())))
126+
128127
self.telemetry_writer.events_folder_path = events_folder
129128
# As this is a common function used by all handler actions, setting operation_id such that it will be the same timestamp for all handler actions, which can be used for identifying all events for an operation.
130129
# NOTE: Enable handler action will set operation_id to activity_id from config settings. And the same will be used in Core.
131130
self.telemetry_writer.set_operation_id(self.operation_id_substitute_for_all_actions_in_telemetry)
132131
else:
133-
# Guest agent does not support telemetry (incompatible OR events_folder not specified)
134-
err_msg = Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG
135-
if self.telemetry_writer.is_agent_compatible():
136-
# Agent is compatible but events folder was not given, so log additional agent version info
137-
err_msg += " [AgentVer: {0} GoalStateVer: {1}]".format(self.telemetry_writer.get_agent_version(), self.telemetry_writer.get_goal_state_agent_version())
138-
self.logger.log_error(err_msg)
132+
self.__log_telemetry_info(telemetry_supported=False)
133+
134+
def __log_telemetry_info(self, telemetry_supported):
135+
""" Logs detailed information about telemetry and logs an error if telemetry is not supported. """
136+
events_folder = self.ext_env_handler.events_folder
137+
events_folder_str = str(events_folder) if events_folder is not None else ""
138+
events_folder_exists = os.path.exists(events_folder) if events_folder is not None else False
139+
env_var_supports_telemetry = self.telemetry_writer.is_agent_compatible()
140+
telemetry_info = "[EventsFolder=\'{0}\'][EventsFolderExists={1}][EnvVar={2}]".format(
141+
events_folder_str, str(events_folder_exists), env_var_supports_telemetry)
142+
143+
if env_var_supports_telemetry is True:
144+
telemetry_info += "[AgentVer={0}][GoalStateVer={1}]".format(self.telemetry_writer.get_agent_version(), self.telemetry_writer.get_goal_state_agent_version())
145+
else:
146+
telemetry_info += "[AgentVer=Unknown][GoalStateVer=Unknown]"
147+
148+
if telemetry_supported is True:
149+
self.logger.log("{0} {1}".format(Constants.TELEMETRY_AT_AGENT_COMPATIBLE_MSG, telemetry_info))
150+
else:
151+
error_msg = "{0} {1}".format(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG, telemetry_info)
152+
self.logger.log_error(error_msg)
139153

140154
def install(self):
141155
try:

src/extension/src/TelemetryWriter.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@
2828
class TelemetryWriter(object):
2929
"""Class for writing telemetry data to events"""
3030

31-
def __init__(self, logger):
31+
def __init__(self, logger, env_layer):
3232
self.logger = logger
33+
self.env_layer = env_layer
3334
self.events_folder_path = None
3435
self.__operation_id = ""
3536
self.__agent_is_compatible = self.__get_agent_supports_telemetry_from_env_var()
@@ -82,7 +83,7 @@ def __events_folder_exists(self):
8283
def write_event(self, message, event_level=Constants.TelemetryEventLevel.Informational, task_name=Constants.TELEMETRY_TASK_NAME):
8384
""" Creates and writes event to event file after validating none of the telemetry size restrictions are breached """
8485
try:
85-
if not self.__events_folder_exists() or not self.__agent_is_compatible or not Constants.TELEMETRY_ENABLED_AT_EXTENSION:
86+
if not self.__events_folder_exists() or not Constants.TELEMETRY_ENABLED_AT_EXTENSION:
8687
return
8788

8889
self.__delete_older_events()

src/extension/src/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@
3737
def main(argv):
3838
stdout_file_mirror = None
3939
file_logger = None
40+
env_layer = EnvLayer()
4041
logger = Logger()
41-
telemetry_writer = TelemetryWriter(logger)
42+
telemetry_writer = TelemetryWriter(logger, env_layer)
4243
logger.telemetry_writer = telemetry_writer # Need to set telemetry_writer within logger to enable sending all logs to telemetry
4344
try:
4445
# initializing action handler
@@ -48,7 +49,6 @@ def main(argv):
4849
runtime_context_handler = RuntimeContextHandler(logger)
4950
json_file_handler = JsonFileHandler(logger)
5051
ext_env_handler = ExtEnvHandler(json_file_handler)
51-
env_layer = EnvLayer()
5252
env_health_manager = EnvHealthManager(env_layer)
5353
if ext_env_handler.handler_environment_json is not None and ext_env_handler.config_folder is not None:
5454
config_folder = ext_env_handler.config_folder

src/extension/tests/Test_ActionHandler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def test_update_command_fail(self):
252252
# Remove the directory after the test
253253
shutil.rmtree(test_dir)
254254

255-
def test_telemetry_not_available(self):
255+
def test_telemetry_available_env_var_not_exists(self):
256256
# agent env var is not set so telemetry is not supported
257257
backup_os_getenv = os.getenv
258258
backup_telemetry_writer = self.runtime.telemetry_writer
@@ -262,14 +262,14 @@ def mock_os_getenv(name, value=None):
262262

263263
# Re-init TelemetryWriter since the env var for compatibility is only checked on init
264264
os.getenv = mock_os_getenv
265-
self.runtime.telemetry_writer = TelemetryWriter(self.runtime.logger)
265+
self.runtime.telemetry_writer = TelemetryWriter(self.runtime.logger, self.runtime.env_layer)
266266
self.action_handler.telemetry_writer = self.runtime.telemetry_writer
267267

268268
self.assertTrue(self.action_handler.uninstall() == Constants.ExitCode.Okay)
269269

270270
file_read = open(self.runtime.logger.file_logger.log_file_path, "r")
271271
self.assertTrue(file_read is not None)
272-
self.assertTrue(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG in file_read.read())
272+
self.assertTrue(Constants.TELEMETRY_AT_AGENT_COMPATIBLE_MSG in file_read.read())
273273
file_read.close()
274274

275275
with self.assertRaises(SystemExit) as sys_exit:

src/extension/tests/Test_TelemetryWriter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import time
66
import unittest
77
from extension.src.Constants import Constants
8+
from extension.src.EnvLayer import EnvLayer
89
from extension.src.TelemetryWriter import TelemetryWriter
910
from extension.src.local_loggers.Logger import Logger
1011
from extension.tests.helpers.VirtualTerminal import VirtualTerminal
@@ -15,7 +16,8 @@ class TestTelemetryWriter(unittest.TestCase):
1516
def setUp(self):
1617
VirtualTerminal().print_lowlight("\n----------------- setup test runner -----------------")
1718
self.logger = Logger()
18-
self.telemetry_writer = TelemetryWriter(self.logger)
19+
self.env_layer = EnvLayer()
20+
self.telemetry_writer = TelemetryWriter(self.logger, self.env_layer)
1921
self.telemetry_writer.events_folder_path = tempfile.mkdtemp()
2022

2123
def tearDown(self):

src/extension/tests/helpers/RuntimeComposer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ def __init__(self):
1515
self.backup_os_getenv = os.getenv
1616
os.getenv = self.getenv_telemetry_enabled
1717
self.logger = Logger()
18-
self.telemetry_writer = TelemetryWriter(self.logger)
1918
self.utility = Utility(self.logger)
2019
self.json_file_handler = JsonFileHandler(self.logger)
2120
self.env_layer = EnvLayer()
2221
self.env_health_manager = EnvHealthManager(self.env_layer)
22+
self.telemetry_writer = TelemetryWriter(self.logger, self.env_layer)
2323
time.sleep = self.mock_sleep
2424
self.env_layer.is_tty_required = self.mock_is_tty_required
2525
self.env_health_manager.check_sudo_status = self.mock_check_sudo_status

0 commit comments

Comments
 (0)