Skip to content

Commit 2712e51

Browse files
authored
refactor: Improve error handling for exception (#1012)
* refactor: Improve error handling for exception Signed-off-by: Vincent Boutour <[email protected]> * fixup! refactor: Improve error handling for exception Signed-off-by: Vincent Boutour <[email protected]> * fixup! refactor: Improve error handling for exception Signed-off-by: Vincent Boutour <[email protected]> --------- Signed-off-by: Vincent Boutour <[email protected]>
1 parent 2356c4e commit 2712e51

14 files changed

+96
-83
lines changed

aws/logs_monitoring/caching/base_tags_cache.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ def write_cache_to_s3(self, data):
6060
DD_S3_BUCKET_NAME, self.get_cache_name_with_prefix()
6161
)
6262
s3_object.put(Body=(bytes(json.dumps(data).encode("UTF-8"))))
63-
except ClientError:
63+
except ClientError as e:
6464
send_forwarder_internal_metrics("s3_cache_write_failure")
65-
self.logger.debug("Unable to write new cache to S3", exc_info=True)
65+
self.logger.debug(f"Unable to write new cache to S3: {e}", exc_info=True)
6666

6767
def acquire_s3_cache_lock(self):
6868
"""Acquire cache lock"""
@@ -76,16 +76,16 @@ def acquire_s3_cache_lock(self):
7676
last_modified_unix_time = get_last_modified_time(file_content)
7777
if last_modified_unix_time + DD_S3_CACHE_LOCK_TTL_SECONDS >= time():
7878
return False
79-
except Exception:
80-
self.logger.debug("Unable to get cache lock file")
79+
except Exception as e:
80+
self.logger.debug(f"Unable to get cache lock file: {e}")
8181

8282
# lock file doesn't exist, create file to acquire lock
8383
try:
8484
cache_lock_object.put(Body=(bytes("lock".encode("UTF-8"))))
8585
send_forwarder_internal_metrics("s3_cache_lock_acquired")
8686
self.logger.debug("S3 cache lock acquired")
87-
except ClientError:
88-
self.logger.debug("Unable to write S3 cache lock file", exc_info=True)
87+
except ClientError as e:
88+
self.logger.debug(f"Unable to write S3 cache lock file: {e}", exc_info=True)
8989
return False
9090

9191
return True
@@ -99,9 +99,9 @@ def release_s3_cache_lock(self):
9999
cache_lock_object.delete()
100100
send_forwarder_internal_metrics("s3_cache_lock_released")
101101
self.logger.debug("S3 cache lock released")
102-
except ClientError:
102+
except ClientError as e:
103103
send_forwarder_internal_metrics("s3_cache_lock_release_failure")
104-
self.logger.debug("Unable to release S3 cache lock", exc_info=True)
104+
self.logger.debug(f"Unable to release S3 cache lock: {e}", exc_info=True)
105105

106106
def get_cache_from_s3(self):
107107
"""Retrieves tags cache from s3 and returns the body along with
@@ -113,9 +113,9 @@ def get_cache_from_s3(self):
113113
file_content = cache_object.get()
114114
tags_cache = json.loads(file_content["Body"].read().decode("utf-8"))
115115
last_modified_unix_time = get_last_modified_time(file_content)
116-
except:
116+
except Exception as e:
117117
send_forwarder_internal_metrics("s3_cache_fetch_failure")
118-
self.logger.debug("Unable to fetch cache from S3", exc_info=True)
118+
self.logger.debug(f"Unable to fetch cache from S3: {e}", exc_info=True)
119119
return {}, -1
120120

121121
return tags_cache, last_modified_unix_time

aws/logs_monitoring/caching/cloudwatch_log_group_cache.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ def _get_log_group_tags_from_cache(self, cache_file_name):
103103
)
104104
tags_cache = json.loads(response.get("Body").read().decode("utf-8"))
105105
last_modified_unix_time = int(response.get("LastModified").timestamp())
106-
except Exception:
106+
except Exception as e:
107107
send_forwarder_internal_metrics("loggroup_cache_fetch_failure")
108-
self.logger.exception(
109-
"Failed to get log group tags from cache", exc_info=True
108+
self.logger.error(
109+
f"Failed to get log group tags from cache: {e}", exc_info=True
110110
)
111111
return None, -1
112112

@@ -120,10 +120,10 @@ def _update_log_group_tags_cache(self, log_group, tags):
120120
Key=cache_file_name,
121121
Body=(bytes(json.dumps(tags).encode("UTF-8"))),
122122
)
123-
except Exception:
123+
except Exception as e:
124124
send_forwarder_internal_metrics("loggroup_cache_write_failure")
125-
self.logger.exception(
126-
"Failed to update log group tags cache", exc_info=True
125+
self.logger.error(
126+
f"Failed to update log group tags cache: {e}", exc_info=True
127127
)
128128

129129
def _is_expired(self, last_modified):
@@ -150,8 +150,8 @@ def _get_log_group_tags(self, log_group_arn):
150150
response = self.cloudwatch_logs_client.list_tags_for_resource(
151151
resourceArn=log_group_arn
152152
)
153-
except Exception:
154-
self.logger.exception("Failed to get log group tags", exc_info=True)
153+
except Exception as e:
154+
self.logger.error(f"Failed to get log group tags: {e}", exc_info=True)
155155
formatted_tags = None
156156
if response is not None:
157157
formatted_tags = [

aws/logs_monitoring/caching/lambda_cache.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ def build_tags_cache(self):
4242
tags_fetch_success = True
4343

4444
except ClientError as e:
45-
self.logger.exception(
46-
"Encountered a ClientError when trying to fetch tags. You may need to give "
47-
"this Lambda's role the 'tag:GetResources' permission"
45+
self.logger.error(
46+
f"Failed to fetch Lambda tags: {e}. "
47+
"Add 'tag:GetResources' permission to the Forwarder's IAM role."
4848
)
4949
additional_tags = [
5050
f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}"

aws/logs_monitoring/caching/s3_tags_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ def build_tags_cache(self):
4040
tags_by_arn_cache.update(page_tags_by_arn)
4141
tags_fetch_success = True
4242
except ClientError as e:
43-
self.logger.exception(
43+
self.logger.error(
4444
"Encountered a ClientError when trying to fetch tags. You may need to give "
45-
"this Lambda's role the 'tag:GetResources' permission"
45+
f"this Lambda's role the 'tag:GetResources' permission: {e}"
4646
)
4747
additional_tags = [
4848
f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}"

aws/logs_monitoring/caching/step_functions_cache.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def build_tags_cache(self):
4949
tags_fetch_success = True
5050

5151
except ClientError as e:
52-
self.logger.exception(
52+
self.logger.error(
5353
"Encountered a ClientError when trying to fetch tags. You may need to give "
54-
"this Lambda's role the 'tag:GetResources' permission"
54+
f"this Lambda's role the 'tag:GetResources' permission: {e}"
5555
)
5656
additional_tags = [
5757
f"http_status_code:{e.response['ResponseMetadata']['HTTPStatusCode']}"
@@ -131,7 +131,7 @@ def _get_state_machine_tags(self, state_machine_arn: str):
131131
ResourceARNList=[state_machine_arn]
132132
)
133133
except Exception as e:
134-
self.logger.exception(f"Failed to get Step Functions tags due to {e}")
134+
self.logger.error(f"Failed to get Step Functions tags due to {e}")
135135

136136
if response and len(response.get("ResourceTagMappingList", {})) > 0:
137137
resource_dict = response.get("ResourceTagMappingList")[0]

aws/logs_monitoring/enhanced_lambda_metrics.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
# under the Apache License Version 2.0.
33
# This product includes software developed at Datadog (https://www.datadoghq.com/).
44
# Copyright 2021 Datadog, Inc.
5+
import datetime
56
import json
6-
import os
77
import logging
8+
import os
89
import re
9-
import datetime
1010
from time import time
1111

1212
ENHANCED_METRICS_NAMESPACE_PREFIX = "aws.lambda.enhanced"
@@ -168,10 +168,11 @@ def parse_and_submit_enhanced_metrics(logs, cache_layer):
168168
)
169169
for enhanced_metric in enhanced_metrics:
170170
enhanced_metric.submit_to_dd()
171-
except Exception:
172-
logger.exception(
173-
"Encountered an error while trying to parse and submit enhanced metrics for log %s",
171+
except Exception as e:
172+
logger.error(
173+
"Encountered an error while trying to parse and submit enhanced metrics for log %s: %s",
174174
log,
175+
str(e),
175176
)
176177

177178

@@ -347,9 +348,9 @@ def parse_metrics_from_report_log(report_log_line):
347348
348349
Args:
349350
report_log_line (str): The REPORT log generated by Lambda
350-
EX: "REPORT RequestId: 814ba7cb-071e-4181-9a09-fa41db5bccad Duration: 1711.87 ms \
351-
Billed Duration: 1800 ms Memory Size: 128 MB Max Memory Used: 98 MB \
352-
XRAY TraceId: 1-5d83c0ad-b8eb33a0b1de97d804fac890 SegmentId: 31255c3b19bd3637 Sampled: true"
351+
EX: "REPORT RequestId: 814ba7cb-071e-4181-9a09-fa41db5bccad Duration: 1711.87 ms \
352+
Billed Duration: 1800 ms Memory Size: 128 MB Max Memory Used: 98 MB \
353+
XRAY TraceId: 1-5d83c0ad-b8eb33a0b1de97d804fac890 SegmentId: 31255c3b19bd3637 Sampled: true"
353354
354355
Returns:
355356
metrics - DatadogMetricPoint[]

aws/logs_monitoring/forwarder.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def _forward_logs(self, logs, key=None):
9999
log["message"] = scrubber.scrub(log["message"])
100100
evaluated_log = log["message"]
101101
except Exception as e:
102-
logger.exception(
102+
logger.error(
103103
f"Exception while scrubbing log message {log['message']}: {e}"
104104
)
105105

@@ -116,8 +116,8 @@ def _forward_logs(self, logs, key=None):
116116
for batch in batcher.batch(logs_to_forward):
117117
try:
118118
client.send(batch)
119-
except Exception:
120-
logger.exception(f"Exception while forwarding log batch {batch}")
119+
except Exception as e:
120+
logger.error(f"Exception while forwarding log batch {batch}: {e}")
121121
failed_logs.extend(batch)
122122
else:
123123
if logger.isEnabledFor(logging.DEBUG):
@@ -142,9 +142,9 @@ def _forward_metrics(self, metrics, key=None):
142142
for metric in metrics:
143143
try:
144144
send_log_metric(metric)
145-
except Exception:
146-
logger.exception(
147-
f"Exception while forwarding metric {json.dumps(metric)}"
145+
except Exception as e:
146+
logger.error(
147+
f"Exception while forwarding metric {json.dumps(metric)}: {e}"
148148
)
149149
failed_metrics.append(metric)
150150
else:
@@ -168,9 +168,9 @@ def _forward_traces(self, traces, key=None):
168168
try:
169169
serialized_trace_paylods = json.dumps(traces)
170170
self.trace_connection.send_traces(serialized_trace_paylods)
171-
except Exception:
172-
logger.exception(
173-
f"Exception while forwarding traces {serialized_trace_paylods}"
171+
except Exception as e:
172+
logger.error(
173+
f"Exception while forwarding traces {serialized_trace_paylods}: {e}"
174174
)
175175
if DD_STORE_FAILED_EVENTS and not key:
176176
self.storage.store_data(RetryPrefix.TRACES, traces)

aws/logs_monitoring/lambda_function.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,41 +4,46 @@
44
# Copyright 2021 Datadog, Inc.
55

66
import json
7-
import os
8-
import boto3
97
import logging
10-
import requests
8+
import os
119
from hashlib import sha1
1210

13-
from datadog_lambda.wrapper import datadog_lambda_wrapper
11+
import boto3
12+
import requests
1413
from datadog import api
15-
from enhanced_lambda_metrics import parse_and_submit_enhanced_metrics
16-
from steps.parsing import parse
17-
from steps.enrichment import enrich
18-
from steps.transformation import transform
19-
from steps.splitting import split
14+
from datadog_lambda.wrapper import datadog_lambda_wrapper
15+
2016
from caching.cache_layer import CacheLayer
17+
from enhanced_lambda_metrics import parse_and_submit_enhanced_metrics
2118
from forwarder import Forwarder
2219
from settings import (
20+
DD_ADDITIONAL_TARGET_LAMBDAS,
2321
DD_API_KEY,
24-
DD_SKIP_SSL_VALIDATION,
2522
DD_API_URL,
2623
DD_FORWARDER_VERSION,
27-
DD_ADDITIONAL_TARGET_LAMBDAS,
2824
DD_RETRY_KEYWORD,
25+
DD_SITE,
26+
DD_SKIP_SSL_VALIDATION,
2927
)
28+
from steps.enrichment import enrich
29+
from steps.parsing import parse
30+
from steps.splitting import split
31+
from steps.transformation import transform
3032

3133
logger = logging.getLogger()
3234
logger.setLevel(logging.getLevelName(os.environ.get("DD_LOG_LEVEL", "INFO").upper()))
3335

3436
# DD_API_KEY must be set
3537
if DD_API_KEY == "<YOUR_DATADOG_API_KEY>" or DD_API_KEY == "":
36-
raise Exception("Missing Datadog API key")
38+
raise Exception(
39+
"Missing Datadog API key. Set DD_API_KEY environment variable. "
40+
"See: https://docs.datadoghq.com/serverless/forwarder/"
41+
)
3742
# Check if the API key is the correct number of characters
3843
if len(DD_API_KEY) != 32:
3944
raise Exception(
40-
"The API key is not the expected length. "
41-
"Please confirm that your API key is correct"
45+
f"Invalid Datadog API key format. Expected 32 characters, received {len(DD_API_KEY)}. "
46+
f"Verify your API key at https://app.{DD_SITE}/organization-settings/api-keys"
4247
)
4348
# Validate the API key
4449
logger.debug("Validating the Datadog API key")
@@ -57,7 +62,11 @@
5762
timeout=10,
5863
)
5964
if not validation_res.ok:
60-
raise Exception("The API key is not valid.")
65+
raise Exception(
66+
f"Datadog API key validation failed (HTTP {validation_res.status_code}). "
67+
f"Verify your API key is correct and DD_SITE matches your Datadog account region (current: {DD_SITE}). "
68+
"See: https://docs.datadoghq.com/getting_started/site/"
69+
)
6170

6271
# Force the layer to use the exact same API key and host as the forwarder
6372
api._api_key = DD_API_KEY
@@ -106,7 +115,7 @@ def init_cache_layer(function_prefix):
106115
if cache_layer is None:
107116
cache_layer = CacheLayer(function_prefix)
108117
except Exception as e:
109-
logger.exception(f"Failed to create cache layer due to {e}")
118+
logger.error(f"Failed to create cache layer due to {e}")
110119
raise
111120

112121

@@ -135,7 +144,7 @@ def invoke_additional_target_lambdas(event):
135144
Payload=lambda_payload,
136145
)
137146
except Exception as e:
138-
logger.exception(
147+
logger.error(
139148
f"Failed to invoke additional target lambda {lambda_arn} due to {e}"
140149
)
141150

aws/logs_monitoring/logs/datadog_http_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ def _close(self):
8484
for future in as_completed(self._futures):
8585
try:
8686
future.result()
87-
except Exception:
88-
logger.exception("Exception while forwarding logs")
87+
except Exception as e:
88+
logger.error(f"Exception while forwarding logs: {e}")
8989

9090
self._session.close()
9191

@@ -95,8 +95,8 @@ def send(self, logs):
9595
"""
9696
try:
9797
data = self._scrubber.scrub("[{}]".format(",".join(logs)))
98-
except ScrubbingException:
99-
raise Exception("could not scrub the payload")
98+
except ScrubbingException as e:
99+
raise Exception(f"could not scrub the payload: {e}")
100100
if DD_USE_COMPRESSION:
101101
data = compress_logs(data, DD_COMPRESSION_LEVEL)
102102

aws/logs_monitoring/logs/datadog_matcher.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,8 @@ def match(self, log):
4444

4545
return True
4646

47-
except ScrubbingException:
48-
raise Exception("could not filter the payload")
47+
except ScrubbingException as e:
48+
raise Exception(f"Failed to filter log: {e}")
49+
50+
except Exception as e:
51+
raise Exception(f"Failed to filter log: {e}")

0 commit comments

Comments
 (0)