From b6ba0c31a8f4c10b23b0823ed5d6f3fca3400bb1 Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Thu, 9 Nov 2023 15:57:39 +0000 Subject: [PATCH 1/2] Add log viewing tools --- dask_databricks/cli.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dask_databricks/cli.py b/dask_databricks/cli.py index 0651ca5..e53828b 100644 --- a/dask_databricks/cli.py +++ b/dask_databricks/cli.py @@ -54,5 +54,27 @@ def run(): subprocess.Popen(["dask", "worker", f"tcp://{DB_DRIVER_IP}:8786"]) +@main.group() +def logs(): + """View cluster init logs.""" + + +@logs.command() +@click.argument("path") +def ls(path): + try: + from databricks.sdk.runtime import dbutils + except ImportError: + raise RuntimeError("Please install databricks-sdk.") + + log_files = [] + for cluster in dbutils.fs.ls(path): + for node in dbutils.fs.ls(cluster.path + "/init_scripts"): + for log in dbutils.fs.ls(node.path): + log_files.append(log.path) + for log in sorted(log_files, key=lambda x: x.split("/")[-1]): + print(log) + + if __name__ == "__main__": main() From ff962d205bbefbd8fb0517569eab623b28b7aa5d Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Fri, 17 Nov 2023 11:43:44 +0000 Subject: [PATCH 2/2] Add cluster log utilities --- README.md | 33 +++++++++++ dask_databricks/cli.py | 126 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 152 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f3dca96..4a82dff 100644 --- a/README.md +++ b/README.md @@ -47,3 +47,36 @@ https://dbc-dp-xxxx.cloud.databricks.com/driver-proxy/o/xxxx/xx-xxx-xxxx/8087/st ![](https://user-images.githubusercontent.com/1610850/281442274-450d41c6-2eb6-42a1-8de6-c4a1a1b84193.png) ![](https://user-images.githubusercontent.com/1610850/281441285-9b84d5f1-d58a-45dc-9354-7385e1599d1f.png) + +### Troubleshooting with cluster logs + +If you're experiencing problems starting your Dask Databricks cluster then viewing logs for your init scripts can help narrow down the problem. + +When you create your cluster we recommend that you [configure your logs](https://docs.databricks.com/en/clusters/configure.html#cluster-log-delivery) to write to somewhere like `dbfs:/cluster_init_logs`. + +To make viewing these logs a little easier we've included a couple of CLI utilities in `dask-databricks` to help you navigate them. + +#### Listing clusters + +You can get a full list of available logs with the `dask databricks ls ` command where the path is the DBFS location you configured your logs to write to. + +```console +$ dask databricks logs ls dbfs:/cluster_init_logs + + Cluster Start time Node Count Node IPs + ────────────────────────────────────────────────────────────────────────────────────── + 1234-987654-a1b2c3d4 Nov 16 2023 10:36 2 10.0.0.1, 10.0.0.2 +``` + +#### Viewing logs + +Once you have your cluster ID you can view the logs from the latest launch of that cluster with `dask databricks cat `. + +```console +$ dask databricks logs cat dbfs:/cluster_init_logs 1234-987654-a1b2c3d4 +Cluster: 1234-987654-a1b2c3d4 +Start time: Nov 16 2023 10:36 +10.0.0.1: Start Python bootstrap +10.0.0.1: PYSPARK_PYTHON is /databricks/python3/bin/python +... +``` diff --git a/dask_databricks/cli.py b/dask_databricks/cli.py index e53828b..44a07fa 100644 --- a/dask_databricks/cli.py +++ b/dask_databricks/cli.py @@ -1,12 +1,30 @@ import logging import os +import random import socket import subprocess import sys import time +from datetime import datetime import click +from rich import box +from rich.color import ANSI_COLOR_NAMES +from rich.console import Console from rich.logging import RichHandler +from rich.table import Table + +console = Console() + +NODE_COLOURS = ["medium_spring_green", "light_steel_blue1", "wheat1", "medium_orchid"] + +# Generate list of random colours from rich +# import random +# from rich.color import Color +# +# for i in range(100): +# colour = Color.random() +# print(f'"{colour.name}",', end=" def get_logger(): @@ -59,21 +77,115 @@ def logs(): """View cluster init logs.""" -@logs.command() -@click.argument("path") -def ls(path): +def _get_logs_at_path(path): try: from databricks.sdk.runtime import dbutils except ImportError: raise RuntimeError("Please install databricks-sdk.") + clusters = {} - log_files = [] for cluster in dbutils.fs.ls(path): + cluster_id = cluster.path.split("/")[-1] + clusters[cluster_id] = {} for node in dbutils.fs.ls(cluster.path + "/init_scripts"): for log in dbutils.fs.ls(node.path): - log_files.append(log.path) - for log in sorted(log_files, key=lambda x: x.split("/")[-1]): - print(log) + filename = log.path.split("/")[-1] + channel = filename.split(".")[-2] + datetime = "_".join(filename.split("_")[:2]) + node_name = log.path.split("/")[-2].split("_", 1)[-1].replace("_", ".") + if datetime not in clusters[cluster_id]: + clusters[cluster_id][datetime] = {} + + if node_name not in clusters[cluster_id][datetime]: + clusters[cluster_id][datetime][node_name] = {} + + clusters[cluster_id][datetime][node_name][channel] = log.path + return clusters + + +def _get_node_color(i): + if i < len(NODE_COLOURS): + return NODE_COLOURS[i] + else: + return random.choice(list(ANSI_COLOR_NAMES)) + + +def _prettify_launch_time(launch_time): + return datetime.strptime(launch_time, "%Y%m%d_%H%M%S").strftime("%b %d %Y %H:%M") + + +@logs.command() +@click.argument("path") +@click.option("--show-filenames", help="Show filenames in the output", is_flag=True, default=False, show_default=True) +def ls(path, show_filenames): + # TODO add flag to list filenames + table = Table(box=box.SIMPLE_HEAD) + table.add_column("Cluster", style="cyan", no_wrap=True) + table.add_column("Start time", style="plum2") + table.add_column("Node Count") + table.add_column("Node IPs") + if show_filenames: + table.add_column("Filenames") + with console.status("[bright_black]Finding logs..."): + clusters = _get_logs_at_path(path) + for cluster in clusters: + first = True + for launch_time in sorted(clusters[cluster], reverse=True): + pretty_launch_time = _prettify_launch_time(launch_time) + cluster_name = cluster if first else "" + node_list = ", ".join( + f"[{_get_node_color(i)}]{name}[/{_get_node_color(i)}]" + for i, name in enumerate(clusters[cluster][launch_time]) + ) + data = [cluster_name, pretty_launch_time, str(len(clusters[cluster][launch_time])), node_list] + if show_filenames: + filenames = "" + for i, node in enumerate(clusters[cluster][launch_time]): + for channel in ["stdout", "stderr"]: + node_colour = _get_node_color(i) + filenames += f"[{node_colour}]{clusters[cluster][launch_time][node][channel]}[/{node_colour}]\n" + data.append(filenames) + table.add_row(*data) + first = False + + console.print(table) + + +@logs.command() +@click.argument("path") +@click.argument("cluster") +def cat(path, cluster): + # TODO add a flag for selecting which start time to view + # TODO add a flag to filter which nodes to view logs for + try: + from databricks.sdk.runtime import dbutils + except ImportError: + raise RuntimeError("Please install databricks-sdk.") + + with console.status("[bright_black]Finding logs..."): + clusters = _get_logs_at_path(path) + + if cluster not in clusters: + console.print(f"Cluster {cluster} not found.", style="bold red", highlight=False) + console.print( + f"Hint: Try running dask [b i]databricks logs ls {path}[/b i] to list clusters.", + style="bright_black", + highlight=False, + ) + sys.exit(1) + + most_recent = sorted(clusters[cluster].keys())[-1] + + console.print(f"Cluster: {cluster}", style="bold cyan", highlight=False) + console.print(f"Start time: {_prettify_launch_time(most_recent)}", style="bold cyan", highlight=False) + + for i, node in enumerate(clusters[cluster][most_recent]): + for channel in ["stdout", "stderr"]: + for line in dbutils.fs.head(clusters[cluster][most_recent][node][channel], 65536).split("\n"): + node_colour = _get_node_color(i) + console.print( + f"[{node_colour}]{node}[/{node_colour}]: {line}", style="grey89" if channel == "stdout" else "plum4" + ) if __name__ == "__main__":