diff --git a/bluehawk/snippets/artifact.snippet.create_artifact.py b/bluehawk/snippets/artifact.snippet.create_artifact.py new file mode 100644 index 0000000000..51f833ae2e --- /dev/null +++ b/bluehawk/snippets/artifact.snippet.create_artifact.py @@ -0,0 +1,6 @@ +import wandb + +run = wandb.init(project="artifacts-example", job_type="add-dataset") +artifact = wandb.Artifact(name="example_artifact", type="dataset") +artifact.add_file(local_path="./dataset.h5", name="training_dataset") +artifact.save() diff --git a/bluehawk/snippets/artifact.snippet.download_artifact-1.py b/bluehawk/snippets/artifact.snippet.download_artifact-1.py new file mode 100644 index 0000000000..33018422ed --- /dev/null +++ b/bluehawk/snippets/artifact.snippet.download_artifact-1.py @@ -0,0 +1,3 @@ +artifact = run.use_artifact( + "training_dataset:latest" +) # returns a run object using the "my_data" artifact diff --git a/bluehawk/snippets/artifact.snippet.download_artifact-2.py b/bluehawk/snippets/artifact.snippet.download_artifact-2.py new file mode 100644 index 0000000000..405c0f3452 --- /dev/null +++ b/bluehawk/snippets/artifact.snippet.download_artifact-2.py @@ -0,0 +1,3 @@ +datadir = ( + artifact.download() +) # downloads the full `my_data` artifact to the default directory. diff --git a/bluehawk/snippets/create-a-report.snippet.create-a-report.py b/bluehawk/snippets/create-a-report.snippet.create-a-report.py new file mode 100644 index 0000000000..e5689e2165 --- /dev/null +++ b/bluehawk/snippets/create-a-report.snippet.create-a-report.py @@ -0,0 +1,4 @@ + +report = wr.Report(project="report_standard") +report.save() + diff --git a/bluehawk/snippets/edit-a-report.snippet.add-code-block-python.py b/bluehawk/snippets/edit-a-report.snippet.add-code-block-python.py new file mode 100644 index 0000000000..7d6195d9e8 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-code-block-python.py @@ -0,0 +1,5 @@ +report = wr.Report(project="report-editing") + +report.blocks = [wr.CodeBlock(code=["Hello, World!"], language="python")] + +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.add-code-blocks.py b/bluehawk/snippets/edit-a-report.snippet.add-code-blocks.py new file mode 100644 index 0000000000..af755e6fa8 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-code-blocks.py @@ -0,0 +1,10 @@ +report = wr.Report(project = "") + +report.blocks = [ + wr.CodeBlock( + code=["this:", "- is", "- a", "cool:", "- yaml", "- file"], language="yaml" + ), + wr.CodeBlock(code=["Hello, World!"], language="python") +] + +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.add-html.py b/bluehawk/snippets/edit-a-report.snippet.add-html.py new file mode 100644 index 0000000000..9449f8ee83 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-html.py @@ -0,0 +1,9 @@ +report = wr.Report(project = "") + +report.blocks = [ + wr.H1(text="How Programmatic Reports work"), + wr.H2(text="Heading 2"), + wr.UnorderedList(items=["Bullet 1", "Bullet 2"]), +] + +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.add-markdown.py b/bluehawk/snippets/edit-a-report.snippet.add-markdown.py new file mode 100644 index 0000000000..836e2ca6a9 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-markdown.py @@ -0,0 +1,6 @@ +report = wr.Report(project = "") + +report.blocks = [ + wr.MarkdownBlock(text="Markdown cell with *italics* and **bold** and $e=mc^2$") +] +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.add-plots.py b/bluehawk/snippets/edit-a-report.snippet.add-plots.py new file mode 100644 index 0000000000..711ab24423 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-plots.py @@ -0,0 +1,17 @@ +report = wr.Report( + project = "", + title="", + description="A descriptive description.", +) + +blocks = [ + wr.PanelGrid( + panels=[ + wr.LinePlot(x="time", y="velocity"), + wr.ScatterPlot(x="time", y="acceleration"), + ] + ) +] + +report.blocks = blocks +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.add-runset-no-panels.py b/bluehawk/snippets/edit-a-report.snippet.add-runset-no-panels.py new file mode 100644 index 0000000000..89fdb4992d --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-runset-no-panels.py @@ -0,0 +1,14 @@ +report = wr.Report( + project = "<project>", + title="An amazing title", + description="A descriptive description.", +) + +blocks = wr.PanelGrid( + runsets=[ + wr.RunSet(project="<project>", entity="<entity>") + ] +) + +report.blocks = [blocks] +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.add-runsets-and-panels.py b/bluehawk/snippets/edit-a-report.snippet.add-runsets-and-panels.py new file mode 100644 index 0000000000..0fa842acbb --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.add-runsets-and-panels.py @@ -0,0 +1,56 @@ +report = wr.Report( + project = "<project>", + title="An amazing title", + description="A descriptive description.", +) + +blocks = wr.PanelGrid( + runsets=[ + wr.RunSet(project="<project>", entity="<entity>") + ], + panels=[ + wr.LinePlot( + title="line title", + x="x", + y=["y"], + range_x=[0, 100], + range_y=[0, 100], + log_x=True, + log_y=True, + title_x="x axis title", + title_y="y axis title", + ignore_outliers=True, + groupby="hyperparam1", + groupby_aggfunc="mean", + groupby_rangefunc="minmax", + smoothing_factor=0.5, + smoothing_type="gaussian", + smoothing_show_original=True, + max_runs_to_show=10, + plot_type="stacked-area", + font_size="large", + legend_position="west", + ), + wr.ScatterPlot( + title="scatter title", + x="y", + y="y", + # z='x', + range_x=[0, 0.0005], + range_y=[0, 0.0005], + # range_z=[0,1], + log_x=False, + log_y=False, + # log_z=True, + running_ymin=True, + running_ymean=True, + running_ymax=True, + font_size="small", + regression=True, + ), + ], + +) + +report.blocks = [blocks] +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.config-filters-0.py b/bluehawk/snippets/edit-a-report.snippet.config-filters-0.py new file mode 100644 index 0000000000..b0f5aeb43c --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.config-filters-0.py @@ -0,0 +1,9 @@ +config = { + "learning_rate": 0.01, + "batch_size": 32, +} + +with wandb.init(project="<project>", entity="<entity>", config=config) as run: + # Your training code here + pass + diff --git a/bluehawk/snippets/edit-a-report.snippet.config-filters-1.py b/bluehawk/snippets/edit-a-report.snippet.config-filters-1.py new file mode 100644 index 0000000000..819d220701 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.config-filters-1.py @@ -0,0 +1,5 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Config('learning_rate') > 0.01" +) diff --git a/bluehawk/snippets/edit-a-report.snippet.config-filters-2.py b/bluehawk/snippets/edit-a-report.snippet.config-filters-2.py new file mode 100644 index 0000000000..ce6acc9b8f --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.config-filters-2.py @@ -0,0 +1,5 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Config('learning_rate') > 0.01 and Config('batch_size') == 32" +) diff --git a/bluehawk/snippets/edit-a-report.snippet.config-filters-3.py b/bluehawk/snippets/edit-a-report.snippet.config-filters-3.py new file mode 100644 index 0000000000..c4a5054ad6 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.config-filters-3.py @@ -0,0 +1,12 @@ +report = wr.Report( + entity="<entity>", + project="<project>", +) + +report.blocks = [ + wr.PanelGrid( + runsets=[runset] + ) +] + +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.embed-rich-media.py b/bluehawk/snippets/edit-a-report.snippet.embed-rich-media.py new file mode 100644 index 0000000000..1bd6ae600e --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.embed-rich-media.py @@ -0,0 +1,6 @@ +report = wr.Report(project = "<project>") + +report.blocks = [ + wr.Video(url="https://www.youtube.com/embed/6riDJMI-Y8U") +] +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.group-runs-config.py b/bluehawk/snippets/edit-a-report.snippet.group-runs-config.py new file mode 100644 index 0000000000..c14ce77e24 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.group-runs-config.py @@ -0,0 +1,21 @@ +# Create a report that groups runs by a config value +report = wr.Report( + entity=entity, + project=project, + title="Grouped Runs Example", +) + +# Create a runset that groups runs by the "group" config value +runset = wr.Runset( + project=project, + entity=entity, + groupby=["config.group"] +) +# Add the runset to a panel grid in the report +report.blocks = [ + wr.PanelGrid( + runsets=[runset], + ) + ] +# Save the report +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.group-runs-metadata.py b/bluehawk/snippets/edit-a-report.snippet.group-runs-metadata.py new file mode 100644 index 0000000000..dccdc915ec --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.group-runs-metadata.py @@ -0,0 +1,22 @@ +# Create a report that groups runs by their metadata (e.g., run name) +report = wr.Report( + entity=entity, + project=project, + title="Grouped Runs by Metadata Example", +) + +# Create a runset that groups runs by their name (metadata) +runset = wr.Runset( + project=project, + entity=entity, + groupby=["Name"] # Group by run names +) + +# Add the runset to a panel grid in the report +report.blocks = [ + wr.PanelGrid( + runsets=[runset], + ) + ] +# Save the report +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.group-runs-summary-metrics.py b/bluehawk/snippets/edit-a-report.snippet.group-runs-summary-metrics.py new file mode 100644 index 0000000000..207feecb79 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.group-runs-summary-metrics.py @@ -0,0 +1,22 @@ +# Create a report that groups runs by a summary metric +report = wr.Report( + entity=entity, + project=project, + title="Grouped Runs by Summary Metrics Example", +) + +# Create a runset that groups runs by the "summary.acc" summary metric +runset = wr.Runset( + project=project, + entity=entity, + groupby=["summary.acc"] # Group by summary values +) + +# Add the runset to a panel grid in the report +report.blocks = [ + wr.PanelGrid( + runsets=[runset], + ) + ] +# Save the report +report.save() diff --git a/bluehawk/snippets/edit-a-report.snippet.metric-filters-0.py b/bluehawk/snippets/edit-a-report.snippet.metric-filters-0.py new file mode 100644 index 0000000000..8e55164918 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.metric-filters-0.py @@ -0,0 +1,5 @@ +with wandb.init(project="<project>", entity="<entity>") as run: + for i in range(3): + run.name = f"run{i+1}" + # Your training code here + pass diff --git a/bluehawk/snippets/edit-a-report.snippet.metric-filters-1.py b/bluehawk/snippets/edit-a-report.snippet.metric-filters-1.py new file mode 100644 index 0000000000..71522c40c2 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.metric-filters-1.py @@ -0,0 +1,5 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('displayName') in ['run1', 'run2', 'run3']" +) diff --git a/bluehawk/snippets/edit-a-report.snippet.metric-filters-2.py b/bluehawk/snippets/edit-a-report.snippet.metric-filters-2.py new file mode 100644 index 0000000000..09926bb00f --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.metric-filters-2.py @@ -0,0 +1,5 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('state') in ['finished']" +) diff --git a/bluehawk/snippets/edit-a-report.snippet.metric-filters-3.py b/bluehawk/snippets/edit-a-report.snippet.metric-filters-3.py new file mode 100644 index 0000000000..36d12168c3 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.metric-filters-3.py @@ -0,0 +1,5 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('state') not in ['crashed']" +) diff --git a/bluehawk/snippets/edit-a-report.snippet.summary-metric-filters.py b/bluehawk/snippets/edit-a-report.snippet.summary-metric-filters.py new file mode 100644 index 0000000000..52a7fc3a13 --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.summary-metric-filters.py @@ -0,0 +1,11 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="SummaryMetric('accuracy') > 0.9" +) + +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('state') in ['finished'] and SummaryMetric('train/train_loss') < 0.5" +) diff --git a/bluehawk/snippets/edit-a-report.snippet.tag-filters.py b/bluehawk/snippets/edit-a-report.snippet.tag-filters.py new file mode 100644 index 0000000000..92d9a0718d --- /dev/null +++ b/bluehawk/snippets/edit-a-report.snippet.tag-filters.py @@ -0,0 +1,5 @@ +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Tags('training') == 'training'" +) diff --git a/bluehawk/snippets/group_runs.snippet.group_runs.py b/bluehawk/snippets/group_runs.snippet.group_runs.py new file mode 100644 index 0000000000..f1754a6241 --- /dev/null +++ b/bluehawk/snippets/group_runs.snippet.group_runs.py @@ -0,0 +1,17 @@ + +import wandb +import wandb_workspaces.reports.v2 as wr + +entity = "<entity>" +project = "<project>" + +for group in ["control", "experiment_a", "experiment_b"]: + for i in range(3): + with wandb.init(entity=entity, project=project, group=group, config={"group": group, "run": i}, name=f"{group}_run_{i}") as run: + # Simulate some training + for step in range(100): + run.log({ + "acc": 0.5 + (step / 100) * 0.3 + (i * 0.05), + "loss": 1.0 - (step / 100) * 0.5 + }) + diff --git a/bluehawk/snippets/import_wandb.snippet.import_wandb.py b/bluehawk/snippets/import_wandb.snippet.import_wandb.py new file mode 100644 index 0000000000..4bb10bc9f7 --- /dev/null +++ b/bluehawk/snippets/import_wandb.snippet.import_wandb.py @@ -0,0 +1 @@ +import wandb diff --git a/bluehawk/snippets/import_wandb.snippet.import_wandb_and_workspaces.py b/bluehawk/snippets/import_wandb.snippet.import_wandb_and_workspaces.py new file mode 100644 index 0000000000..bd77d892f7 --- /dev/null +++ b/bluehawk/snippets/import_wandb.snippet.import_wandb_and_workspaces.py @@ -0,0 +1,2 @@ +import wandb +import wandb_workspaces.reports.v2 as wr diff --git a/bluehawk/snippets/import_wandb.snippet.import_wandb_workspaces.py b/bluehawk/snippets/import_wandb.snippet.import_wandb_workspaces.py new file mode 100644 index 0000000000..3315b1704f --- /dev/null +++ b/bluehawk/snippets/import_wandb.snippet.import_wandb_workspaces.py @@ -0,0 +1 @@ +import wandb_workspaces.reports.v2 as wr diff --git a/bluehawk/snippets/models_quickart.snippet.publish_model.py b/bluehawk/snippets/models_quickart.snippet.publish_model.py new file mode 100644 index 0000000000..266f9f4d37 --- /dev/null +++ b/bluehawk/snippets/models_quickart.snippet.publish_model.py @@ -0,0 +1,17 @@ + +# Artifact name specifies the specific artifact version within our team's project +artifact_name = f'{TEAM_ENTITY}/{PROJECT}/{model_artifact_name}:v0' +print("Artifact name: ", artifact_name) + +REGISTRY_NAME = "Model" # Name of the registry in W&B +COLLECTION_NAME = "DemoModels" # Name of the collection in the registry + +# Create a target path for our artifact in the registry +target_path = f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}" +print("Target path: ", target_path) + +run = wandb.init(entity=TEAM_ENTITY, project=PROJECT) +model_artifact = run.use_artifact(artifact_or_name=artifact_name, type="model") +run.link_artifact(artifact=model_artifact, target_path=target_path) +run.finish() + diff --git a/bluehawk/snippets/models_quickart.snippet.query_registry.py b/bluehawk/snippets/models_quickart.snippet.query_registry.py new file mode 100644 index 0000000000..fd8406d41c --- /dev/null +++ b/bluehawk/snippets/models_quickart.snippet.query_registry.py @@ -0,0 +1,29 @@ + +# Initialize wandb API +api = wandb.Api() + +# Find all artifact versions that contains the string `model` and +# has either the tag `text-classification` or an `latest` alias +registry_filters = { + "name": {"$regex": "model"} +} + +# Use logical $or operator to filter artifact versions +version_filters = { + "$or": [ + {"tag": "text-classification"}, + {"alias": "latest"} + ] +} + +# Returns an iterable of all artifact versions that match the filters +artifacts = api.registries(filter=registry_filters).collections().versions(filter=version_filters) + +# Print out the name, collection, aliases, tags, and created_at date of each artifact found +for art in artifacts: + print(f"artifact name: {art.name}") + print(f"collection artifact belongs to: { art.collection.name}") + print(f"artifact aliases: {art.aliases}") + print(f"tags attached to artifact: {art.tags}") + print(f"artifact created at: {art.created_at}\n") + diff --git a/bluehawk/snippets/models_quickart.snippet.retrieve_model.py b/bluehawk/snippets/models_quickart.snippet.retrieve_model.py new file mode 100644 index 0000000000..99ec4e799f --- /dev/null +++ b/bluehawk/snippets/models_quickart.snippet.retrieve_model.py @@ -0,0 +1,12 @@ + +REGISTRY_NAME = "Model" # Name of the registry in W&B +COLLECTION_NAME = "DemoModels" # Name of the collection in the registry +VERSION = 0 # Version of the artifact to retrieve + +model_artifact_name = f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}:v{VERSION}" +print(f"Model artifact name: {model_artifact_name}") + +run = wandb.init(entity=TEAM_ENTITY, project=PROJECT) +registry_model = run.use_artifact(artifact_or_name=model_artifact_name) +local_model_path = registry_model.download() + diff --git a/bluehawk/snippets/models_quickart.snippet.share_report.py b/bluehawk/snippets/models_quickart.snippet.share_report.py new file mode 100644 index 0000000000..a297ac5bcb --- /dev/null +++ b/bluehawk/snippets/models_quickart.snippet.share_report.py @@ -0,0 +1,30 @@ + +experiment_summary = """This is a summary of the experiment conducted to train a simple model using W&B.""" +dataset_info = """The dataset used for training consists of synthetic data generated by a simple model.""" +model_info = """The model is a simple linear regression model that predicts output based on input data with some noise.""" + +report = wr.Report( + project=PROJECT, + entity=TEAM_ENTITY, + title="My Awesome Model Training Report", + description=experiment_summary, + blocks= [ + wr.TableOfContents(), + wr.H2("Experiment Summary"), + wr.MarkdownBlock(text=experiment_summary), + wr.H2("Dataset Information"), + wr.MarkdownBlock(text=dataset_info), + wr.H2("Model Information"), + wr.MarkdownBlock(text = model_info), + wr.PanelGrid( + panels=[ + wr.LinePlot(title="Train Loss", x="Step", y=["loss"], title_x="Step", title_y="Loss") + ], + ), + ] + +) + +# Save the report to W&B +report.save() + diff --git a/bluehawk/snippets/models_quickart.snippet.train_model.py b/bluehawk/snippets/models_quickart.snippet.train_model.py new file mode 100644 index 0000000000..fc471c0072 --- /dev/null +++ b/bluehawk/snippets/models_quickart.snippet.train_model.py @@ -0,0 +1,48 @@ + +def model(training_data: int) -> int: + """Model simulation for demonstration purposes.""" + return training_data * 2 + random.randint(-1, 1) + +# Simulate weights and noise +weights = random.random() # Initialize random weights +noise = random.random() / 5 # Small random noise to simulate noise + +# Hyperparameters and configuration +config = { + "epochs": 10, # Number of epochs to train + "learning_rate": 0.01, # Learning rate for the optimizer +} + +# Use context manager to initialize and close W&B runs +with wandb.init(project=PROJECT, entity=TEAM_ENTITY, config=config) as run: + # Simulate training loop + for epoch in range(config["epochs"]): + xb = weights + noise # Simulated input training data + yb = weights + noise * 2 # Simulated target output (double the input noise) + + y_pred = model(xb) # Model prediction + loss = (yb - y_pred) ** 2 # Mean Squared Error loss + + print(f"epoch={epoch}, loss={y_pred}") + # Log epoch and loss to W&B + run.log({ + "epoch": epoch, + "loss": loss, + }) + + # Unique name for the model artifact, + model_artifact_name = f"model-demo" + + # Local path to save the simulated model file + PATH = "model.txt" + + # Save model locally + with open(PATH, "w") as f: + f.write(str(weights)) # Saving model weights to a file + + # Create an artifact object + # Add locally saved model to artifact object + artifact = wandb.Artifact(name=model_artifact_name, type="model", description="My trained model") + artifact.add_file(local_path=PATH) + artifact.save() + diff --git a/bluehawk/snippets/registry.snippet.registry_index.py b/bluehawk/snippets/registry.snippet.registry_index.py new file mode 100644 index 0000000000..6d0641f1eb --- /dev/null +++ b/bluehawk/snippets/registry.snippet.registry_index.py @@ -0,0 +1,29 @@ + +import wandb +import random + +# Initialize a W&B Run to track the artifact +run = wandb.init(project="registry_quickstart") + +# Create a simulated model file so that you can log it +with open("my_model.txt", "w") as f: + f.write("Model: " + str(random.random())) + +# Log the artifact to W&B +logged_artifact = run.log_artifact( + artifact_or_path="./my_model.txt", + name="gemma-finetuned", + type="model" # Specifies artifact type +) + +# Specify the name of the collection and registry +# you want to publish the artifact to +COLLECTION_NAME = "first-collection" +REGISTRY_NAME = "model" + +# Link the artifact to the registry +run.link_artifact( + artifact=logged_artifact, + target_path=f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}" +) + diff --git a/bluehawk/snippets/table.snippet.log_table.py b/bluehawk/snippets/table.snippet.log_table.py new file mode 100644 index 0000000000..c115b418ae --- /dev/null +++ b/bluehawk/snippets/table.snippet.log_table.py @@ -0,0 +1,6 @@ +import wandb + +run = wandb.init(project="table-test") +my_table = wandb.Table(columns=["a", "b"], data=[["a1", "b1"], ["a2", "b2"]]) +run.log({"Table Name": my_table}) + diff --git a/bluehawk/snippets/wandb_install.snippet.pip_install_wandb.sh b/bluehawk/snippets/wandb_install.snippet.pip_install_wandb.sh new file mode 100644 index 0000000000..923d3c1028 --- /dev/null +++ b/bluehawk/snippets/wandb_install.snippet.pip_install_wandb.sh @@ -0,0 +1 @@ +pip install wandb diff --git a/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_only.sh b/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_only.sh new file mode 100644 index 0000000000..1e490c5788 --- /dev/null +++ b/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_only.sh @@ -0,0 +1 @@ +pip install wandb wandb_workspaces diff --git a/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh b/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh new file mode 100644 index 0000000000..1e490c5788 --- /dev/null +++ b/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh @@ -0,0 +1 @@ +pip install wandb wandb_workspaces diff --git a/bluehawk/snippets/wandb_install.snippet.pip_install_workspaces_only.sh b/bluehawk/snippets/wandb_install.snippet.pip_install_workspaces_only.sh new file mode 100644 index 0000000000..aa37881d93 --- /dev/null +++ b/bluehawk/snippets/wandb_install.snippet.pip_install_workspaces_only.sh @@ -0,0 +1 @@ +pip install wandb_workspaces diff --git a/bluehawk/snippets/wandb_install.snippet.uv_install_wandb_packages.sh b/bluehawk/snippets/wandb_install.snippet.uv_install_wandb_packages.sh new file mode 100644 index 0000000000..075b1afe24 --- /dev/null +++ b/bluehawk/snippets/wandb_install.snippet.uv_install_wandb_packages.sh @@ -0,0 +1 @@ +uv install wandb wandb_workspaces diff --git a/bluehawk/snippets/wandb_login.snippet.wandb_login_cli.sh b/bluehawk/snippets/wandb_login.snippet.wandb_login_cli.sh new file mode 100644 index 0000000000..03f4f40a03 --- /dev/null +++ b/bluehawk/snippets/wandb_login.snippet.wandb_login_cli.sh @@ -0,0 +1 @@ +wandb login diff --git a/bluehawk/snippets/wandb_login.snippet.wandb_login_python.py b/bluehawk/snippets/wandb_login.snippet.wandb_login_python.py new file mode 100644 index 0000000000..718656a7a2 --- /dev/null +++ b/bluehawk/snippets/wandb_login.snippet.wandb_login_python.py @@ -0,0 +1 @@ +wandb.login(api_key = "<YOUR_API_KEY>") diff --git a/bluehawk/source/artifact.py b/bluehawk/source/artifact.py new file mode 100644 index 0000000000..6e474d1258 --- /dev/null +++ b/bluehawk/source/artifact.py @@ -0,0 +1,26 @@ +##### Create an artifact ##### +# :snippet-start: create_artifact +import wandb + +run = wandb.init(project="artifacts-example", job_type="add-dataset") +artifact = wandb.Artifact(name="example_artifact", type="dataset") +artifact.add_file(local_path="./dataset.h5", name="training_dataset") +artifact.save() +# :snippet-end: create_artifact +##### Create an artifact - END ##### + + +##### Download an artifact ##### +# :snippet-start: download_artifact-1 +artifact = run.use_artifact( + "training_dataset:latest" +) # returns a run object using the "my_data" artifact +# :snippet-end: download_artifact-1 + +# :snippet-start: download_artifact-2 +datadir = ( + artifact.download() +) # downloads the full `my_data` artifact to the default directory. +# :snippet-end: download_artifact-2 + +##### Download an artifact - END ##### \ No newline at end of file diff --git a/bluehawk/source/config.py b/bluehawk/source/config.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bluehawk/source/create-a-report.py b/bluehawk/source/create-a-report.py new file mode 100644 index 0000000000..91ac0ed160 --- /dev/null +++ b/bluehawk/source/create-a-report.py @@ -0,0 +1,9 @@ +import wandb +import wandb_workspaces.reports.v2 as wr + +# :snippet-start: create-a-report + +report = wr.Report(project="report_standard") +report.save() + +# :snippet-end: create-a-report \ No newline at end of file diff --git a/bluehawk/source/create_registry.py b/bluehawk/source/create_registry.py new file mode 100644 index 0000000000..faa05696a0 --- /dev/null +++ b/bluehawk/source/create_registry.py @@ -0,0 +1,6 @@ +import wandb + +registry = wandb.Api().create_registry( + name="<registry_name>", + visibility="< 'restricted' | 'organization' >", +) \ No newline at end of file diff --git a/bluehawk/source/edit-a-report.py b/bluehawk/source/edit-a-report.py new file mode 100644 index 0000000000..3e45cfb924 --- /dev/null +++ b/bluehawk/source/edit-a-report.py @@ -0,0 +1,378 @@ +import wandb +import wandb_workspaces.reports.v2 as wr + + +## Psuedocode for testing various report features ## +# if testing is set to True: +# replace <entity> and <project> with test values +## + +# :snippet-start: add-plots +report = wr.Report( + project = "<project>", + title="<title>", + description="A descriptive description.", +) + +blocks = [ + wr.PanelGrid( + panels=[ + wr.LinePlot(x="time", y="velocity"), + wr.ScatterPlot(x="time", y="acceleration"), + ] + ) +] + +report.blocks = blocks +report.save() +# :snippet-end: add-plots + + + +# :snippet-start: add-runset-no-panels +report = wr.Report( + project = "<project>", + title="An amazing title", + description="A descriptive description.", +) + +blocks = wr.PanelGrid( + runsets=[ + wr.RunSet(project="<project>", entity="<entity>") + ] +) + +report.blocks = [blocks] +report.save() +# :snippet-end: add-runset-no-panels + +# :snippet-start: add-runsets-and-panels +report = wr.Report( + project = "<project>", + title="An amazing title", + description="A descriptive description.", +) + +blocks = wr.PanelGrid( + runsets=[ + wr.RunSet(project="<project>", entity="<entity>") + ], + panels=[ + wr.LinePlot( + title="line title", + x="x", + y=["y"], + range_x=[0, 100], + range_y=[0, 100], + log_x=True, + log_y=True, + title_x="x axis title", + title_y="y axis title", + ignore_outliers=True, + groupby="hyperparam1", + groupby_aggfunc="mean", + groupby_rangefunc="minmax", + smoothing_factor=0.5, + smoothing_type="gaussian", + smoothing_show_original=True, + max_runs_to_show=10, + plot_type="stacked-area", + font_size="large", + legend_position="west", + ), + wr.ScatterPlot( + title="scatter title", + x="y", + y="y", + # z='x', + range_x=[0, 0.0005], + range_y=[0, 0.0005], + # range_z=[0,1], + log_x=False, + log_y=False, + # log_z=True, + running_ymin=True, + running_ymean=True, + running_ymax=True, + font_size="small", + regression=True, + ), + ], + +) + +report.blocks = [blocks] +report.save() +# :snippet-end: add-runsets-and-panels + + +entity = "<entity>" +project = "<project>" + +for group in ["control", "experiment_a", "experiment_b"]: + for i in range(3): + with wandb.init(entity=entity, project=project, group=group, config={"group": group, "run": i}, name=f"{group}_run_{i}") as run: + # Simulate some training + for step in range(100): + run.log({ + "acc": 0.5 + (step / 100) * 0.3 + (i * 0.05), + "loss": 1.0 - (step / 100) * 0.5 + }) + +# :snippet-start: group-runs-config +# Create a report that groups runs by a config value +report = wr.Report( + entity=entity, + project=project, + title="Grouped Runs Example", +) + +# Create a runset that groups runs by the "group" config value +runset = wr.Runset( + project=project, + entity=entity, + groupby=["config.group"] +) +# Add the runset to a panel grid in the report +report.blocks = [ + wr.PanelGrid( + runsets=[runset], + ) + ] +# Save the report +report.save() +# :snippet-end: group-runs-config + + +##### Group runs by run metadata ##### +entity = "<entity>" +project = "<project>" + +# :snippet-start: group-runs-metadata +# Create a report that groups runs by their metadata (e.g., run name) +report = wr.Report( + entity=entity, + project=project, + title="Grouped Runs by Metadata Example", +) + +# Create a runset that groups runs by their name (metadata) +runset = wr.Runset( + project=project, + entity=entity, + groupby=["Name"] # Group by run names +) + +# Add the runset to a panel grid in the report +report.blocks = [ + wr.PanelGrid( + runsets=[runset], + ) + ] +# Save the report +report.save() +# :snippet-end: group-runs-metadata +##### END ##### + + +##### Group runs by summary metrics ##### +entity = "<entity>" +project = "<project>" + +for group in ["control", "experiment_a", "experiment_b"]: + for i in range(3): + with wandb.init(entity=entity, project=project, group=group, config={"group": group, "run": i}, name=f"{group}_run_{i}") as run: + # Simulate some training + for step in range(100): + run.log({ + "acc": 0.5 + (step / 100) * 0.3 + (i * 0.05), + "loss": 1.0 - (step / 100) * 0.5 + }) + +# :snippet-start: group-runs-summary-metrics +# Create a report that groups runs by a summary metric +report = wr.Report( + entity=entity, + project=project, + title="Grouped Runs by Summary Metrics Example", +) + +# Create a runset that groups runs by the "summary.acc" summary metric +runset = wr.Runset( + project=project, + entity=entity, + groupby=["summary.acc"] # Group by summary values +) + +# Add the runset to a panel grid in the report +report.blocks = [ + wr.PanelGrid( + runsets=[runset], + ) + ] +# Save the report +report.save() +# :snippet-end: group-runs-summary-metrics +##### END ##### + + +##### Config filters ##### +# :snippet-start: config-filters-0 +config = { + "learning_rate": 0.01, + "batch_size": 32, +} + +with wandb.init(project="<project>", entity="<entity>", config=config) as run: + # Your training code here + pass + +# :snippet-end: config-filters-0 + +# :snippet-start: config-filters-1 +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Config('learning_rate') > 0.01" +) +# :snippet-end: config-filters-1 + +# :snippet-start: config-filters-2 +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Config('learning_rate') > 0.01 and Config('batch_size') == 32" +) +# :snippet-end: config-filters-2 + +# :snippet-start: config-filters-3 +report = wr.Report( + entity="<entity>", + project="<project>", +) + +report.blocks = [ + wr.PanelGrid( + runsets=[runset] + ) +] + +report.save() +# :snippet-end: config-filters-3 +##### END ##### + + +##### Metric filters ##### +# :snippet-start: metric-filters-0 +with wandb.init(project="<project>", entity="<entity>") as run: + for i in range(3): + run.name = f"run{i+1}" + # Your training code here + pass +# :snippet-end: metric-filters-0 + +# :snippet-start: metric-filters-1 +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('displayName') in ['run1', 'run2', 'run3']" +) +# :snippet-end: metric-filters-1 + +# :snippet-start: metric-filters-2 +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('state') in ['finished']" +) +# :snippet-end: metric-filters-2 + +# :snippet-start: metric-filters-3 +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('state') not in ['crashed']" +) +# :snippet-end: metric-filters-3 +##### END ##### + +##### Summary metric filters ##### +# :snippet-start: summary-metric-filters +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="SummaryMetric('accuracy') > 0.9" +) + +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Metric('state') in ['finished'] and SummaryMetric('train/train_loss') < 0.5" +) +# :snippet-end: summary-metric-filters +##### END ##### + +# :snippet-start: tag-filters +runset = wr.Runset( + entity="<entity>", + project="<project>", + filters="Tags('training') == 'training'" +) +# :snippet-end: tag-filters + + + +###### Adding different block types to a report ###### + +# :snippet-start: add-code-blocks +report = wr.Report(project = "<project>") + +report.blocks = [ + wr.CodeBlock( + code=["this:", "- is", "- a", "cool:", "- yaml", "- file"], language="yaml" + ), + wr.CodeBlock(code=["Hello, World!"], language="python") +] + +report.save() +# :snippet-end: add-code-blocks + +# :snippet-start: add-code-block-python +report = wr.Report(project="report-editing") + +report.blocks = [wr.CodeBlock(code=["Hello, World!"], language="python")] + +report.save() +# :snippet-end: add-code-block-python + + +# :snippet-start: add-markdown +report = wr.Report(project = "<project>") + +report.blocks = [ + wr.MarkdownBlock(text="Markdown cell with *italics* and **bold** and $e=mc^2$") +] +report.save() +# :snippet-end: add-markdown + +# :snippet-start: add-html +report = wr.Report(project = "<project>") + +report.blocks = [ + wr.H1(text="How Programmatic Reports work"), + wr.H2(text="Heading 2"), + wr.UnorderedList(items=["Bullet 1", "Bullet 2"]), +] + +report.save() +# :snippet-end: add-html + + +# :snippet-start: embed-rich-media +report = wr.Report(project = "<project>") + +report.blocks = [ + wr.Video(url="https://www.youtube.com/embed/6riDJMI-Y8U") +] +report.save() +# :snippet-end: embed-rich-media diff --git a/bluehawk/source/group_runs.py b/bluehawk/source/group_runs.py new file mode 100644 index 0000000000..7b7a7362b8 --- /dev/null +++ b/bluehawk/source/group_runs.py @@ -0,0 +1,19 @@ +# :snippet-start: group_runs + +import wandb +import wandb_workspaces.reports.v2 as wr + +entity = "<entity>" +project = "<project>" + +for group in ["control", "experiment_a", "experiment_b"]: + for i in range(3): + with wandb.init(entity=entity, project=project, group=group, config={"group": group, "run": i}, name=f"{group}_run_{i}") as run: + # Simulate some training + for step in range(100): + run.log({ + "acc": 0.5 + (step / 100) * 0.3 + (i * 0.05), + "loss": 1.0 - (step / 100) * 0.5 + }) + +# :snippet-end: group_runs \ No newline at end of file diff --git a/bluehawk/source/import_wandb.py b/bluehawk/source/import_wandb.py new file mode 100644 index 0000000000..2ba6ce0cb5 --- /dev/null +++ b/bluehawk/source/import_wandb.py @@ -0,0 +1,14 @@ +# :snippet-start: import_wandb +import wandb +# :snippet-end: import_wandb + +# :snippet-start: import_wandb_workspaces +import wandb_workspaces.reports.v2 as wr +# :snippet-end: import_wandb_workspaces + +# :snippet-start: import_wandb_and_workspaces +import wandb +import wandb_workspaces.reports.v2 as wr +# :snippet-end: import_wandb_and_workspaces + +import random # For simulating data diff --git a/bluehawk/source/models_quickart.py b/bluehawk/source/models_quickart.py new file mode 100644 index 0000000000..d55695bbdf --- /dev/null +++ b/bluehawk/source/models_quickart.py @@ -0,0 +1,170 @@ +import wandb +import random # For simulating data +import wandb_workspaces.reports.v2 as wr + +TEAM_ENTITY = "smle-reg-team-2" +PROJECT = "my-awesome-project" + + + + +##### Train a model ##### + +# :snippet-start: train_model + +def model(training_data: int) -> int: + """Model simulation for demonstration purposes.""" + return training_data * 2 + random.randint(-1, 1) + +# Simulate weights and noise +weights = random.random() # Initialize random weights +noise = random.random() / 5 # Small random noise to simulate noise + +# Hyperparameters and configuration +config = { + "epochs": 10, # Number of epochs to train + "learning_rate": 0.01, # Learning rate for the optimizer +} + +# Use context manager to initialize and close W&B runs +with wandb.init(project=PROJECT, entity=TEAM_ENTITY, config=config) as run: + # Simulate training loop + for epoch in range(config["epochs"]): + xb = weights + noise # Simulated input training data + yb = weights + noise * 2 # Simulated target output (double the input noise) + + y_pred = model(xb) # Model prediction + loss = (yb - y_pred) ** 2 # Mean Squared Error loss + + print(f"epoch={epoch}, loss={y_pred}") + # Log epoch and loss to W&B + run.log({ + "epoch": epoch, + "loss": loss, + }) + + # Unique name for the model artifact, + model_artifact_name = f"model-demo" + + # Local path to save the simulated model file + PATH = "model.txt" + + # Save model locally + with open(PATH, "w") as f: + f.write(str(weights)) # Saving model weights to a file + + # Create an artifact object + # Add locally saved model to artifact object + artifact = wandb.Artifact(name=model_artifact_name, type="model", description="My trained model") + artifact.add_file(local_path=PATH) + artifact.save() + +# :snippet-end: train_model + +##### Publish the model to the Model registry ##### + +# :snippet-start: publish_model + +# Artifact name specifies the specific artifact version within our team's project +artifact_name = f'{TEAM_ENTITY}/{PROJECT}/{model_artifact_name}:v0' +print("Artifact name: ", artifact_name) + +REGISTRY_NAME = "Model" # Name of the registry in W&B +COLLECTION_NAME = "DemoModels" # Name of the collection in the registry + +# Create a target path for our artifact in the registry +target_path = f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}" +print("Target path: ", target_path) + +run = wandb.init(entity=TEAM_ENTITY, project=PROJECT) +model_artifact = run.use_artifact(artifact_or_name=artifact_name, type="model") +run.link_artifact(artifact=model_artifact, target_path=target_path) +run.finish() + +# :snippet-end: publish_model + +##### Retrieve model artifact from registry for inference ##### + +# :snippet-start: retrieve_model + +REGISTRY_NAME = "Model" # Name of the registry in W&B +COLLECTION_NAME = "DemoModels" # Name of the collection in the registry +VERSION = 0 # Version of the artifact to retrieve + +model_artifact_name = f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}:v{VERSION}" +print(f"Model artifact name: {model_artifact_name}") + +run = wandb.init(entity=TEAM_ENTITY, project=PROJECT) +registry_model = run.use_artifact(artifact_or_name=model_artifact_name) +local_model_path = registry_model.download() + +# :snippet-end: retrieve_model + +##### Share your finds with a report ##### + +# :snippet-start: share_report + +experiment_summary = """This is a summary of the experiment conducted to train a simple model using W&B.""" +dataset_info = """The dataset used for training consists of synthetic data generated by a simple model.""" +model_info = """The model is a simple linear regression model that predicts output based on input data with some noise.""" + +report = wr.Report( + project=PROJECT, + entity=TEAM_ENTITY, + title="My Awesome Model Training Report", + description=experiment_summary, + blocks= [ + wr.TableOfContents(), + wr.H2("Experiment Summary"), + wr.MarkdownBlock(text=experiment_summary), + wr.H2("Dataset Information"), + wr.MarkdownBlock(text=dataset_info), + wr.H2("Model Information"), + wr.MarkdownBlock(text = model_info), + wr.PanelGrid( + panels=[ + wr.LinePlot(title="Train Loss", x="Step", y=["loss"], title_x="Step", title_y="Loss") + ], + ), + ] + +) + +# Save the report to W&B +report.save() + +# :snippet-end: share_report + +##### Query the registry ##### + +# :snippet-start: query_registry + +# Initialize wandb API +api = wandb.Api() + +# Find all artifact versions that contains the string `model` and +# has either the tag `text-classification` or an `latest` alias +registry_filters = { + "name": {"$regex": "model"} +} + +# Use logical $or operator to filter artifact versions +version_filters = { + "$or": [ + {"tag": "text-classification"}, + {"alias": "latest"} + ] +} + +# Returns an iterable of all artifact versions that match the filters +artifacts = api.registries(filter=registry_filters).collections().versions(filter=version_filters) + +# Print out the name, collection, aliases, tags, and created_at date of each artifact found +for art in artifacts: + print(f"artifact name: {art.name}") + print(f"collection artifact belongs to: { art.collection.name}") + print(f"artifact aliases: {art.aliases}") + print(f"tags attached to artifact: {art.tags}") + print(f"artifact created at: {art.created_at}\n") + +# :snippet-end: query_registry \ No newline at end of file diff --git a/bluehawk/source/registry.py b/bluehawk/source/registry.py new file mode 100644 index 0000000000..57f63b0fcd --- /dev/null +++ b/bluehawk/source/registry.py @@ -0,0 +1,31 @@ +# :snippet-start: registry_index + +import wandb +import random + +# Initialize a W&B Run to track the artifact +run = wandb.init(project="registry_quickstart") + +# Create a simulated model file so that you can log it +with open("my_model.txt", "w") as f: + f.write("Model: " + str(random.random())) + +# Log the artifact to W&B +logged_artifact = run.log_artifact( + artifact_or_path="./my_model.txt", + name="gemma-finetuned", + type="model" # Specifies artifact type +) + +# Specify the name of the collection and registry +# you want to publish the artifact to +COLLECTION_NAME = "first-collection" +REGISTRY_NAME = "model" + +# Link the artifact to the registry +run.link_artifact( + artifact=logged_artifact, + target_path=f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}" +) + +# :snippet-end: registry_index \ No newline at end of file diff --git a/bluehawk/source/table.py b/bluehawk/source/table.py new file mode 100644 index 0000000000..c1305c7b98 --- /dev/null +++ b/bluehawk/source/table.py @@ -0,0 +1,8 @@ +# :snippet-start: log_table +import wandb + +run = wandb.init(project="table-test") +my_table = wandb.Table(columns=["a", "b"], data=[["a1", "b1"], ["a2", "b2"]]) +run.log({"Table Name": my_table}) + +# :snippet-end: log_table \ No newline at end of file diff --git a/bluehawk/source/wandb_install.sh b/bluehawk/source/wandb_install.sh new file mode 100644 index 0000000000..8179d7d6cf --- /dev/null +++ b/bluehawk/source/wandb_install.sh @@ -0,0 +1,19 @@ +# :snippet-start: pip_install_wandb +pip install wandb +# :snippet-end: pip_install_wandb + +# :snippet-start: pip_install_wandb_only +pip install wandb wandb_workspaces +# :snippet-end: pip_install_wandb_only + +# :snippet-start: pip_install_workspaces_only +pip install wandb_workspaces +# :snippet-end: pip_install_workspaces_only + +# :snippet-start: pip_install_wandb_packages +pip install wandb wandb_workspaces +# :snippet-end: pip_install_wandb_packages + +# :snippet-start: uv_install_wandb_packages +uv install wandb wandb_workspaces +# :snippet-end: uv_install_wandb_packages \ No newline at end of file diff --git a/bluehawk/source/wandb_login.py b/bluehawk/source/wandb_login.py new file mode 100644 index 0000000000..ec968678d3 --- /dev/null +++ b/bluehawk/source/wandb_login.py @@ -0,0 +1,5 @@ +import wandb + +# :snippet-start: wandb_login_python +wandb.login(api_key = "<YOUR_API_KEY>") +# :snippet-end: wandb_login_python \ No newline at end of file diff --git a/bluehawk/source/wandb_login.sh b/bluehawk/source/wandb_login.sh new file mode 100644 index 0000000000..bdb0f10bd7 --- /dev/null +++ b/bluehawk/source/wandb_login.sh @@ -0,0 +1,3 @@ +# :snippet-start: wandb_login_cli +wandb login +# :snippet-end: wandb_login_cli \ No newline at end of file diff --git a/content/en/guides/core/artifacts/_index.md b/content/en/guides/core/artifacts/_index.md index d8ae8a76e1..084c2d1235 100644 --- a/content/en/guides/core/artifacts/_index.md +++ b/content/en/guides/core/artifacts/_index.md @@ -30,7 +30,7 @@ You can use artifacts throughout your entire ML workflow as inputs and outputs o {{% alert %}} -The proceeding code snippets are meant to be run in order. +The following code snippets are meant to be run in order. {{% /alert %}} ## Create an artifact @@ -41,18 +41,10 @@ Create an artifact with four lines of code: 3. Add one or more files, such as a model file or dataset, to your artifact object. 4. Log your artifact to W&B. -For example, the proceeding code snippet shows how to log a file called `dataset.h5` to an artifact called `example_artifact`: +For example, the following code snippet shows how to log a file called `dataset.h5` to an artifact called `example_artifact`: -```python -import wandb +{{< code language="python" source="/bluehawk/snippets/artifact.snippet.create_artifact.py" >}} -run = wandb.init(project="artifacts-example", job_type="add-dataset") -artifact = wandb.Artifact(name="example_artifact", type="dataset") -artifact.add_file(local_path="./dataset.h5", name="training_dataset") -artifact.save() - -# Logs the artifact version "my_data" as a dataset with data from dataset.h5 -``` - The `type` of the artifact affects how it appears in the W&B platform. If you do not specify a `type`, it defaults to `unspecified`. - Each label of the dropdown represents a different `type` parameter value. In the above code snippet, the artifact's `type` is `dataset`. @@ -66,20 +58,13 @@ Indicate the artifact you want to mark as input to your run with the [`use_artif Following the preceding code snippet, this next code block shows how to use the `training_dataset` artifact: -```python -artifact = run.use_artifact( - "training_dataset:latest" -) # returns a run object using the "my_data" artifact -``` +{{< code language="python" source="/bluehawk/snippets/artifact.snippet.download_artifact-1.py" >}} + This returns an artifact object. Next, use the returned object to download all contents of the artifact: -```python -datadir = ( - artifact.download() -) # downloads the full `my_data` artifact to the default directory. -``` +{{< code language="python" source="/bluehawk/snippets/artifact.snippet.download_artifact-2.py" >}} {{% alert %}} You can pass a custom path into the `root` [parameter]({{< relref "/ref/python/experiments/artifact.md" >}}) to download an artifact to a specific directory. For alternate ways to download artifacts and to see additional parameters, see the guide on [downloading and using artifacts]({{< relref "./download-and-use-an-artifact.md" >}}). diff --git a/content/en/guides/core/registry/_index.md b/content/en/guides/core/registry/_index.md index 33392f337f..18fb69cc02 100644 --- a/content/en/guides/core/registry/_index.md +++ b/content/en/guides/core/registry/_index.md @@ -54,35 +54,7 @@ As an example, the proceeding code example shows how to log and link a model art Save this Python code to a script and run it. W&B Python SDK version 0.18.6 or newer is required. -```python title="hello_collection.py" -import wandb -import random - -# Initialize a W&B Run to track the artifact -run = wandb.init(project="registry_quickstart") - -# Create a simulated model file so that you can log it -with open("my_model.txt", "w") as f: - f.write("Model: " + str(random.random())) - -# Log the artifact to W&B -logged_artifact = run.log_artifact( - artifact_or_path="./my_model.txt", - name="gemma-finetuned", - type="model" # Specifies artifact type -) - -# Specify the name of the collection and registry -# you want to publish the artifact to -COLLECTION_NAME = "first-collection" -REGISTRY_NAME = "model" - -# Link the artifact to the registry -run.link_artifact( - artifact=logged_artifact, - target_path=f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}" -) -``` +{{< code language="python" source="/bluehawk/snippets/registry.snippet.registry_index.py" >}} W&B automatically creates a collection for you if the collection you specify in the returned run object's `link_artifact(target_path = "")` method does not exist within the registry you specify. diff --git a/content/en/guides/core/reports/create-a-report.md b/content/en/guides/core/reports/create-a-report.md index 9e7cc2278d..0451cd8be9 100644 --- a/content/en/guides/core/reports/create-a-report.md +++ b/content/en/guides/core/reports/create-a-report.md @@ -42,15 +42,12 @@ See this [Google Colab](https://colab.research.google.com/github/wandb/examples/ {{% tab header="Report and Workspace API" value="python_wr_api"%}} Create a report programmatically: -1. Install W&B SDK (`wandb`) and Report and Workspace API (`wandb-workspaces`): - ```bash - pip install wandb wandb-workspaces - ``` -2. Next, import workspaces - ```python - import wandb - import wandb_workspaces.reports.v2 as wr - ``` +1. Install the W&B Python SDK (`wandb`) and Report and Workspace API (`wandb-workspaces`): + {{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh" >}} + +2. Next, import W&B Python SDK and the Report and Workspace API: + {{< code language="python" source="bluehawk/snippets/import_wandb.snippet.import_wandb_and_workspaces.py" >}} + 3. Create a report with `wandb_workspaces.reports.v2.Report`. Create a report instance with the Report Class Public API ([`wandb.apis.reports`]({{< relref "/ref/python/public-api/api.md#reports" >}})). Specify a name for the project. ```python report = wr.Report(project="report_standard") diff --git a/content/en/guides/core/reports/edit-a-report.md b/content/en/guides/core/reports/edit-a-report.md index f607d3aa11..9a60f72c1c 100644 --- a/content/en/guides/core/reports/edit-a-report.md +++ b/content/en/guides/core/reports/edit-a-report.md @@ -13,23 +13,29 @@ weight: 20 W&B Report and Workspace API is in Public Preview. {{% /alert %}} -Edit a report interactively with the App UI or programmatically with the W&B SDK. - Reports consist of _blocks_. Blocks make up the body of a report. Within these blocks you can add text, images, embedded visualizations, plots from experiments and run, and panels grids. _Panel grids_ are a specific type of block that hold panels and _run sets_. Run sets are a collection of runs logged to a project in W&B. Panels are visualizations of run set data. -{{% alert %}} +Edit a report interactively with the W&B App or programmatically with the W&B Python SDK. + +<!-- {{% alert %}} Check out the [Programmatic workspaces tutorial]({{< relref "/tutorials/workspaces.md" >}}) for a step by step example on how create and customize a saved workspace view. -{{% /alert %}} +{{% /alert %}} --> -{{% alert %}} -Verify that you have the W&B Report and Workspace API `wandb-workspaces` installed in addition to the W&B Python SDK if you want to programmatically edit a report: +{{% alert title="Programmatic editing requirements" %}} + +To programmatically edit a report, you need to install the W&B Report and Workspace API `wandb-workspaces` in addition to the W&B Python SDK (`wandb`): + +{{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh" >}} + +Within your Python script or notebook, import both the W&B Python SDK (`wandb`) and the `wandb_workspaces.reports.v2` module to access the Report and Workspace API: + +{{< code language="python" source="/bluehawk/snippets/import_wandb.snippet.import_wandb_and_workspaces.py" >}} + +Throughtout this guide, code snippets that demonstrate how to programmatically edit a report are prefixed with `wr.` to indicate they are part of the Report and Workspace API. -```pip -pip install wandb wandb-workspaces -``` {{% /alert %}} ## Add plots @@ -48,32 +54,9 @@ Enter a forward slash (`/`) in the report to display a dropdown menu. Select **A Add plots to a report programmatically with the SDK. Pass a list of one or more plot or chart objects to the `panels` parameter in the `PanelGrid` Public API Class. Create a plot or chart object with its associated Python Class. -The proceeding examples demonstrates how to create a line plot and scatter plot. - -```python -import wandb -import wandb_workspaces.reports.v2 as wr - -report = wr.Report( - project="report-editing", - title="An amazing title", - description="A descriptive description.", -) - -blocks = [ - wr.PanelGrid( - panels=[ - wr.LinePlot(x="time", y="velocity"), - wr.ScatterPlot(x="time", y="acceleration"), - ] - ) -] - -report.blocks = blocks -report.save() -``` +The following examples demonstrates how to create a line plot and scatter plot. -For more information about available plots and charts you can add to a report programmatically, see `wr.panels`. +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-plots.py" >}} {{% /tab %}} {{< /tabpane >}} @@ -102,95 +85,23 @@ If you import a panel into a report, run names are inherited from the project. I {{% tab header="Report and Workspace API" value="python_wr_api"%}} -Add run sets from projects with the `wr.Runset()` and `wr.PanelGrid` Classes. The proceeding procedure describes how to add a runset: +Add run sets from projects with the `wr.Runset()` and `wr.PanelGrid` Classes. The following procedure describes how to add a runset: 1. Create a `wr.Runset()` object instance. Provide the name of the project that contains the run sets for the project parameter and the entity that owns the project for the entity parameter. 2. Create a `wr.PanelGrid()` object instance. Pass a list of one or more runset objects to the `run sets` parameter. 3. Store one or more `wr.PanelGrid()` object instances in a list. 4. Update the report instance blocks attribute with the list of panel grid instances. -```python -import wandb -import wandb_workspaces.reports.v2 as wr -report = wr.Report( - project="report-editing", - title="An amazing title", - description="A descriptive description.", -) - -panel_grids = wr.PanelGrid( - runsets=[wr.RunSet(project="<project-name>", entity="<entity-name>")] -) - -report.blocks = [panel_grids] -report.save() -``` - -You can optionally add runsets and panels with one call to the SDK: - -```python -import wandb - -report = wr.Report( - project="report-editing", - title="An amazing title", - description="A descriptive description.", -) - -panel_grids = wr.PanelGrid( - panels=[ - wr.LinePlot( - title="line title", - x="x", - y=["y"], - range_x=[0, 100], - range_y=[0, 100], - log_x=True, - log_y=True, - title_x="x axis title", - title_y="y axis title", - ignore_outliers=True, - groupby="hyperparam1", - groupby_aggfunc="mean", - groupby_rangefunc="minmax", - smoothing_factor=0.5, - smoothing_type="gaussian", - smoothing_show_original=True, - max_runs_to_show=10, - plot_type="stacked-area", - font_size="large", - legend_position="west", - ), - wr.ScatterPlot( - title="scatter title", - x="y", - y="y", - # z='x', - range_x=[0, 0.0005], - range_y=[0, 0.0005], - # range_z=[0,1], - log_x=False, - log_y=False, - # log_z=True, - running_ymin=True, - running_ymean=True, - running_ymax=True, - font_size="small", - regression=True, - ), - ], - runsets=[wr.RunSet(project="<project-name>", entity="<entity-name>")], -) - - -report.blocks = [panel_grids] -report.save() -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-runset-no-panels.py" >}} {{% /tab %}} {{< /tabpane >}} +## Add run sets and panels +You can optionally add runsets and panels with one call to the SDK: + +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-runsets-and-panels.py" >}} ## Freeze a run set @@ -223,51 +134,15 @@ Group runs by config values to compare runs with similar configurations. Config For example, the following code snippet first initializes a run with a config value for `group`, then groups runs in a report based on the `group` config value. Replace values for `<entity>` and `<project>` with your W&B entity and project names. -```python -import wandb -import wandb_workspaces.reports.v2 as wr - -entity = "<entity>" -project = "<project>" - -for group in ["control", "experiment_a", "experiment_b"]: - for i in range(3): - with wandb.init(entity=entity, project=project, group=group, config={"group": group, "run": i}, name=f"{group}_run_{i}") as run: - # Simulate some training - for step in range(100): - run.log({ - "acc": 0.5 + (step / 100) * 0.3 + (i * 0.05), - "loss": 1.0 - (step / 100) * 0.5 - }) -``` -Within your Python script or notebook, you can then group runs by the `config.group` value: +{{< code language="python" source="/bluehawk/snippets/group_runs.snippet.group_runs.py" >}} -```python -runset = wr.Runset( - project=project, - entity=entity, - groupby=["config.group"] # Group by the "group" config value -) -``` -Continuing from the previous example, you can create a report with the grouped run set: -```python -report = wr.Report( - entity=entity, - project=project, - title="Grouped Runs Example", -) +You can then group runs by the `config.group` value: -report.blocks = [ - wr.PanelGrid( - runsets=[runset], - ) - ] +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.group-runs-config.py" >}} -report.save() -``` ### Group runs by run metadata @@ -275,13 +150,7 @@ Group runs by a run's name (`Name`), state (`State`), or job type (`JobType`). Continuing from the previous example, you can group your runs by their name with the following code snippet: -```python -runset = wr.Runset( - project=project, - entity=entity, - groupby=["Name"] # Group by run names -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.group-runs-metadata.py" >}} {{% alert %}} The name of the run is the name you specify in the `wandb.init(name=)` parameter. If you do not specify a name, W&B generates a random name for the run. @@ -297,34 +166,11 @@ The syntax for grouping runs by summary metrics is `summary.<key>`, where `<key> For example, suppose you log a summary metric called `acc`: -```python -import wandb -import wandb_workspaces.reports.v2 as wr - -entity = "<entity>" -project = "<project>" - -for group in ["control", "experiment_a", "experiment_b"]: - for i in range(3): - with wandb.init(entity=entity, project=project, group=group, config={"group": group, "run": i}, name=f"{group}_run_{i}") as run: - # Simulate some training - for step in range(100): - run.log({ - "acc": 0.5 + (step / 100) * 0.3 + (i * 0.05), - "loss": 1.0 - (step / 100) * 0.5 - }) - -``` +{{< code language="python" source="/bluehawk/snippets/group_runs.snippet.group_runs.py" >}} You can then group runs by the `summary.acc` summary metric: -```python -runset = wr.Runset( - project=project, - entity=entity, - groupby=["summary.acc"] # Group by summary values -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.group-runs-summary-metrics.py" >}} ## Filter a run set programmatically @@ -355,64 +201,19 @@ Filter a runset by one or more config values. Config values are parameters you s For example, the following code snippet first initializes a run with a config value for `learning_rate` and `batch_size`, then filters runs in a report based on the `learning_rate` config value. -```python -import wandb - -config = { - "learning_rate": 0.01, - "batch_size": 32, -} +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.config-filters-0.py" >}} -with wandb.init(project="<project>", entity="<entity>", config=config) as run: - # Your training code here - pass -``` +The following code snippet shows how to filter runs based on learning rates greater than `0.01`: -Within your Python script or notebook, you can then programmatically filter runs that have a learning rate greater than `0.01`. +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.config-filters-1.py" >}} -```python -import wandb_workspaces.reports.v2 as wr +The following code snippet shows how to filter runs based on a single config value that have a learning rate greater than `0.01`and a batch size equal to `32`: -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Config('learning_rate') > 0.01" -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.config-filters-2.py" >}} -You can also filter by multiple config values with the `and` operator: - -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Config('learning_rate') > 0.01 and Config('batch_size') == 32" -) -``` +Once you have defined your filtered run set, you can create a report and pass the filtered run set to `wr.PanelGrid(runsets=)`: -Continuing from the previous example, you can create a report with the filtered runset as follows: - -```python -report = wr.Report( - entity="<entity>", - project="<project>", - title="My Report" -) - -report.blocks = [ - wr.PanelGrid( - runsets=[runset], - panels=[ - wr.LinePlot( - x="Step", - y=["accuracy"], - ) - ] - ) -] - -report.save() -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.config-filters-3.py" >}} ### Metric filters @@ -426,82 +227,39 @@ Metric('key') operation [<value>] ``` {{% /alert %}} -For example, consider the following Python snippet that creates three runs and assigns each of them a name: +Consider the following Python snippet that creates three runs and assigns each of them a name: -```python -import wandb - -with wandb.init(project="<project>", entity="<entity>") as run: - for i in range(3): - run.name = f"run{i+1}" - # Your training code here - pass -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.metric-filters-0.py" >}} When you create your report, you can filter runs by their display name. For example, to filter runs with names `run1`, `run2`, and `run3`, you can use the following code: -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Metric('displayName') in ['run1', 'run2', 'run3']" -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.metric-filters-1.py" >}} + {{% alert %}} You can find the name of the run in the **Overview** page of a run in the W&B App or programmatically with `Api.runs().run.name`. {{% /alert %}} -The following examples demonstrate how to filter a runset by the run's state (`finished`, `crashed`, or `running`): +The following examples demonstrate how to filter a runset by the run's state (`finished`, `crashed`, or `running`). In the following example, we filter a run set to include only runs that have finished: -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Metric('state') in ['finished']" -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.metric-filters-2.py" >}} -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Metric('state') not in ['crashed']" -) -``` +The following example demonstrates how to filter a run set to exclude runs that have crashed: + +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.metric-filters-3.py" >}} ### SummaryMetric filters The following examples demonstrate how to filter a run set by summary metrics. Summary metrics are the values you log to a run with `wandb.Run.log()`. After you log a run, you can find the names of your summary metrics in the W&B App under the **Summary** section of a run's **Overview** page. -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="SummaryMetric('accuracy') > 0.9" -) -``` - -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Metric('state') in ['finished'] and SummaryMetric('train/train_loss') < 0.5" -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.summary-metric-filters.py" >}} ### Tags filters The following code snippet shows how to filter a runs set by its tags. Tags are values you add to a run (programmatically or with the W&B App). -```python -runset = wr.Runset( - entity="<entity>", - project="<project>", - filters="Tags('training') == 'training'" -) -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.tag-filters.py" >}} ## Add code blocks @@ -520,22 +278,10 @@ Select the name of the programming language on the right hand of the code block. Use the `wr.CodeBlock` Class to create a code block programmatically. Provide the name of the language and the code you want to display for the language and code parameters, respectively. -For example the proceeding example demonstrates a list in YAML file: - -```python -import wandb -import wandb_workspaces.reports.v2 as wr +For example the following example demonstrates a list in YAML file: -report = wr.Report(project="report-editing") +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-code-blocks.py" >}} -report.blocks = [ - wr.CodeBlock( - code=["this:", "- is", "- a", "cool:", "- yaml", "- file"], language="yaml" - ) -] - -report.save() -``` This will render a code block similar to: @@ -548,16 +294,9 @@ cool: - file ``` -The proceeding example demonstrates a Python code block: +The following example demonstrates a Python code block: -```python -report = wr.Report(project="report-editing") - - -report.blocks = [wr.CodeBlock(code=["Hello, World!"], language="python")] - -report.save() -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-code-block-python.py" >}} This will render a code block similar to: @@ -584,16 +323,9 @@ Enter a forward slash (`/`) in the report to display a dropdown menu. From the d Use the `wandb.apis.reports.MarkdownBlock` Class to create a markdown block programmatically. Pass a string to the `text` parameter: -```python -import wandb -import wandb_workspaces.reports.v2 as wr +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-markdown.py" >}} -report = wr.Report(project="report-editing") - -report.blocks = [ - wr.MarkdownBlock(text="Markdown cell with *italics* and **bold** and $e=mc^2$") -] -``` +<br> This will render a markdown block similar to: @@ -617,22 +349,10 @@ Enter a forward slash (`/`) in the report to display a dropdown menu. From the d {{% tab header="Report and Workspace API" value="python_wr_api" %}} -Pass a list of one or more HTML elements to `wandb.apis.reports.blocks` attribute. The proceeding example demonstrates how to create an H1, H2, and an unordered list: - -```python -import wandb -import wandb_workspaces.reports.v2 as wr - -report = wr.Report(project="report-editing") +Pass a list of one or more HTML elements to `wandb.apis.reports.blocks` attribute. The following example demonstrates how to create an H1, H2, and an unordered list: -report.blocks = [ - wr.H1(text="How Programmatic Reports work"), - wr.H2(text="Heading 2"), - wr.UnorderedList(items=["Bullet 1", "Bullet 2"]), -] +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.add-html.py" >}} -report.save() -``` This will render a HTML elements to the following: @@ -674,22 +394,9 @@ Copy and paste a SoundCloud link to embed an audio file into a report. {{% tab header="Report and Workspace API" value="python_wr_api" %}} -Pass a list of one or more embedded media objects to the `wandb.apis.reports.blocks` attribute. The proceeding example demonstrates how to embed video and Twitter media into a report: - -```python -import wandb -import wandb_workspaces.reports.v2 as wr +Pass a list of one or more embedded media objects to the `wandb.apis.reports.blocks` attribute. The following example demonstrates how to embed video and Twitter media into a report: -report = wr.Report(project="report-editing") - -report.blocks = [ - wr.Video(url="https://www.youtube.com/embed/6riDJMI-Y8U"), - wr.Twitter( - embed_html='<blockquote class="twitter-tweet"><p lang="en" dir="ltr">The voice of an angel, truly. <a href="https://twitter.com/hashtag/MassEffect?src=hash&ref_src=twsrc%5Etfw">#MassEffect</a> <a href="https://t.co/nMev97Uw7F">pic.twitter.com/nMev97Uw7F</a></p>— Mass Effect (@masseffect) <a href="https://twitter.com/masseffect/status/1428748886655569924?ref_src=twsrc%5Etfw">August 20, 2021</a></blockquote>\n' - ), -] -report.save() -``` +{{< code language="python" source="/bluehawk/snippets/edit-a-report.snippet.embed-rich-media.py" >}} {{% /tab %}} @@ -709,7 +416,7 @@ Select a panel grid and press `delete` on your keyboard to delete a panel grid. ## Collapse headers to organize Reports -Collapse headers in a Report to hide content within a text block. When the report is loaded, only headers that are expanded will show content. Collapsing headers in reports can help organize your content and prevent excessive data loading. The proceeding gif demonstrates the process. +Collapse headers in a Report to hide content within a text block. When the report is loaded, only headers that are expanded will show content. Collapsing headers in reports can help organize your content and prevent excessive data loading. The following gif demonstrates the process. {{< img src="/images/reports/collapse_headers.gif" alt="Collapsing headers in a report." >}} diff --git a/content/en/guides/hosting/hosting-options/self-managed/kubernetes-operator/_index.md b/content/en/guides/hosting/hosting-options/self-managed/kubernetes-operator/_index.md index 7ced435982..084cc5713f 100644 --- a/content/en/guides/hosting/hosting-options/self-managed/kubernetes-operator/_index.md +++ b/content/en/guides/hosting/hosting-options/self-managed/kubernetes-operator/_index.md @@ -198,7 +198,8 @@ This step assumes that the first admin user account is created with the browser. Follow these steps to verify the installation: -1. Install the W&B CLI: +1. Install the W&B Python SDK (`wandb`): + ```shell pip install wandb ``` diff --git a/content/en/guides/models/sweeps/local-controller.md b/content/en/guides/models/sweeps/local-controller.md index 3793baebdf..9afd867a03 100644 --- a/content/en/guides/models/sweeps/local-controller.md +++ b/content/en/guides/models/sweeps/local-controller.md @@ -16,11 +16,9 @@ The local controller feature allows the user to commence search and stop algorit This feature is offered to support faster development and debugging of new algorithms for the Sweeps tool. It is not intended for actual hyperparameter optimization workloads. {{% /alert %}} -Before you get start, you must install the W&B SDK(`wandb`). Type the following code snippet into your command line: +Before you start, install the W&B Python SDK(`wandb`). Copy and poaste the following command in your terminal: -``` -pip install wandb sweeps -``` +{{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb.sh" >}} The following examples assume you already have a configuration file and a training loop defined in a python script or Jupyter Notebook. For more information about how to define a configuration file, see [Define sweep configuration]({{< relref "/guides/models/sweeps/define-sweep-configuration/" >}}). diff --git a/content/en/guides/models/tables/_index.md b/content/en/guides/models/tables/_index.md index 5256d28e71..9e9e52a824 100644 --- a/content/en/guides/models/tables/_index.md +++ b/content/en/guides/models/tables/_index.md @@ -37,13 +37,7 @@ Log a table with a few lines of code: - `data`: Set the contents of the table. - [`run.log()`]({{< relref "/ref/python/experiments/run.md/#method-runlog" >}}): Log the table to save it to W&B. -```python -import wandb - -run = wandb.init(project="table-test") -my_table = wandb.Table(columns=["a", "b"], data=[["a1", "b1"], ["a2", "b2"]]) -run.log({"Table Name": my_table}) -``` +{{< code language="python" source="/bluehawk/snippets/table.snippet.log_table.py" >}} ## How to get started * [Quickstart]({{< relref "./tables-walkthrough.md" >}}): Learn to log data tables, visualize data, and query data. diff --git a/content/en/guides/models/track/workspaces.md b/content/en/guides/models/track/workspaces.md index a645d328d5..ca5dd84d37 100644 --- a/content/en/guides/models/track/workspaces.md +++ b/content/en/guides/models/track/workspaces.md @@ -144,12 +144,9 @@ You can define the workspace's properties, such as: ### Install Workspace API -In addition to `wandb`, ensure that you install `wandb-workspaces`: - -```bash -pip install wandb wandb-workspaces -``` +In addition to `wandb`, make sure you install `wandb-workspaces`: +{{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh" >}} ### Define and save a workspace view programmatically diff --git a/content/en/guides/models_quickstart.md b/content/en/guides/models_quickstart.md index a43e3b1717..7d7eaabfe4 100644 --- a/content/en/guides/models_quickstart.md +++ b/content/en/guides/models_quickstart.md @@ -18,19 +18,27 @@ To authenticate your machine with W&B, you must first generate an API key at [wa Install the W&B library and some other packages you will need for this walkthrough. -```python -pip install wandb -``` +{{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb_packages.sh" >}} -Import W&B Python SDK: +Import [W&B Python SDK]({{< relref "/ref/python" >}}) (`wandb.sdk`) to log and monitor experiments during training: -```python -import wandb -``` +{{< code language="python" source="/bluehawk/snippets/import_wandb.snippet.import_wandb.py" >}} + + +Next, import the [Reports and Workspaces API]({{< relref "/ref/wandb_workspaces" >}}) (`wandb.wandb-workspaces`) to create and share a report in a later step: + +{{< code language="python" source="/bluehawk/snippets/import_wandb.snippet.import_wandb_workspaces.py" >}} + +You will use the Reports and Workspaces API in a [later step to create and share your findings in a report](#share-your-finds-with-a-report). -Specify the entity of your team in the following code block: +## Log in to W&B and set up your project +Log in to W&B using your API key. You can use `wandb.login()` method by passing your API key as an argument, or by setting the `WANDB_API_KEY` environment variable. The following code snippet demonstrates how to log in using the `wandb.login()` method: + +{{< code language="python" source="/bluehawk/snippets/wandb_login.snippet.wandb_login_python.py" >}} + +Next, specify the entity and project name for your W&B runs within your Python script or notebook. The entity is typically your username or the name of your team or organization. The project is a way to group related runs together. ```python TEAM_ENTITY = "<Team_Entity>" # Replace with your team entity @@ -41,58 +49,9 @@ PROJECT = "my-awesome-project" The following code simulates a basic machine learning workflow: training a model, logging metrics, and saving the model as an artifact. -Use the W&B Python SDK (`wandb.sdk`) to interact with W&B during training. Log the loss using [`wandb.Run.log()`]({{< relref "/ref/python/experiments/run/#method-runlog" >}}), then save the trained model as an artifact using [`wandb.Artifact`]({{< relref "/ref/python/experiments/artifact.md" >}}) before finally adding the model file using [`Artifact.add_file`]({{< relref "/ref/python/experiments/artifact.md#add_file" >}}). +Use the W&B Python SDK (`wandb.sdk`) to interact with W&B during training. Log the loss using [`wandb.Run.log()`]({{< relref "/ref/python/experiments/run/#method-runlog" >}}), then save the trained model as an artifact using [`wandb.Artifact`]({{< relref "/ref/python/experiments/artifact.md" >}}) before finally adding the model file using [`Artifact.add_file()`]({{< relref "/ref/python/experiments/artifact.md#add_file" >}}). -```python -import random # For simulating data - -def model(training_data: int) -> int: - """Model simulation for demonstration purposes.""" - return training_data * 2 + random.randint(-1, 1) - -# Simulate weights and noise -weights = random.random() # Initialize random weights -noise = random.random() / 5 # Small random noise to simulate noise - -# Hyperparameters and configuration -config = { - "epochs": 10, # Number of epochs to train - "learning_rate": 0.01, # Learning rate for the optimizer -} - -# Use context manager to initialize and close W&B runs -with wandb.init(project=PROJECT, entity=TEAM_ENTITY, config=config) as run: - # Simulate training loop - for epoch in range(config["epochs"]): - xb = weights + noise # Simulated input training data - yb = weights + noise * 2 # Simulated target output (double the input noise) - - y_pred = model(xb) # Model prediction - loss = (yb - y_pred) ** 2 # Mean Squared Error loss - - print(f"epoch={epoch}, loss={y_pred}") - # Log epoch and loss to W&B - run.log({ - "epoch": epoch, - "loss": loss, - }) - - # Unique name for the model artifact, - model_artifact_name = f"model-demo" - - # Local path to save the simulated model file - PATH = "model.txt" - - # Save model locally - with open(PATH, "w") as f: - f.write(str(weights)) # Saving model weights to a file - - # Create an artifact object - # Add locally saved model to artifact object - artifact = wandb.Artifact(name=model_artifact_name, type="model", description="My trained model") - artifact.add_file(local_path=PATH) - artifact.save() -``` +{{< code language="python" source="/bluehawk/snippets/models_quickart.snippet.train_model.py" >}} The key takeaways from the previous code block are: * Use `wandb.Run.log()` to log metrics during training. @@ -104,7 +63,7 @@ Now that you have trained a model and saved it as an artifact, you can publish i ## View the training data in the dashboard -Log in to your account at https://wandb.ai/login +Log in to your account at https://wandb.ai/login. Under **Projects** you should see `my-awesome-project` (or whatever you used as a project name above). Click this to enter the workspace for your project. @@ -117,23 +76,7 @@ From here, you can see details about every run you've done. In this screenshot, To share the model with others in your organization, publish it to a [collection]({{< relref "/guides/core/registry/create_collection" >}}) using `wandb.Run.link_artifact()`. The following code links the artifact to the [core Model registry]({{< relref "/guides/core/registry/registry_types/#core-registry" >}}), making it accessible to your team. -```python -# Artifact name specifies the specific artifact version within our team's project -artifact_name = f'{TEAM_ENTITY}/{PROJECT}/{model_artifact_name}:v0' -print("Artifact name: ", artifact_name) - -REGISTRY_NAME = "Model" # Name of the registry in W&B -COLLECTION_NAME = "DemoModels" # Name of the collection in the registry - -# Create a target path for our artifact in the registry -target_path = f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}" -print("Target path: ", target_path) - -run = wandb.init(entity=TEAM_ENTITY, project=PROJECT) -model_artifact = run.use_artifact(artifact_or_name=artifact_name, type="model") -run.link_artifact(artifact=model_artifact, target_path=target_path) -run.finish() -``` +{{< code language="python" source="/bluehawk/snippets/models_quickart.snippet.publish_model.py" >}} After running `wandb.Run.link_artifact()`, the model artifact will be in the `DemoModels` collection in your registry. From there, you can view details such as the version history, [lineage map]({{< relref "/guides/core/registry/lineage/" >}}), and other [metadata]({{< relref "/guides/core/registry/registry_cards/" >}}). @@ -143,18 +86,7 @@ For additional information on how to link artifacts to a registry, see [Link art To use a model for inference, use `wandb.Run.use_artifact()` to retrieve the published artifact from the registry. This returns an artifact object that you can then use [`wandb.Artifact.download()`]({{< relref "/ref/python/experiments/artifact/#method-artifactdownload" >}}) to download the artifact to a local file. -```python -REGISTRY_NAME = "Model" # Name of the registry in W&B -COLLECTION_NAME = "DemoModels" # Name of the collection in the registry -VERSION = 0 # Version of the artifact to retrieve - -model_artifact_name = f"wandb-registry-{REGISTRY_NAME}/{COLLECTION_NAME}:v{VERSION}" -print(f"Model artifact name: {model_artifact_name}") - -run = wandb.init(entity=TEAM_ENTITY, project=PROJECT) -registry_model = run.use_artifact(artifact_or_name=model_artifact_name) -local_model_path = registry_model.download() -``` +{{< code language="python" source="/bluehawk/snippets/models_quickart.snippet.retrieve_model.py" >}} For more information on how to retrieve artifacts from a registry, see [Download an artifact from a registry]({{< relref "/guides/core/registry/download_use_artifact/" >}}). @@ -168,48 +100,11 @@ W&B Report and Workspace API is in Public Preview. Create and share a [report]({{< relref "/guides/core/reports/_index.md" >}}) to summarize your work. To create a report programmatically, use the [W&B Report and Workspace API]({{< relref "/ref/wandb_workspaces/reports.md" >}}). -First, install the W&B Reports API: - -```python -pip install wandb wandb-workspaces -qqq -``` - The following code block creates a report with multiple blocks, including markdown, panel grids, and more. You can customize the report by adding more blocks or changing the content of existing blocks. The output of the code block prints a link to the URL report created. You can open this link in your browser to view the report. -```python -import wandb_workspaces.reports.v2 as wr - -experiment_summary = """This is a summary of the experiment conducted to train a simple model using W&B.""" -dataset_info = """The dataset used for training consists of synthetic data generated by a simple model.""" -model_info = """The model is a simple linear regression model that predicts output based on input data with some noise.""" - -report = wr.Report( - project=PROJECT, - entity=TEAM_ENTITY, - title="My Awesome Model Training Report", - description=experiment_summary, - blocks= [ - wr.TableOfContents(), - wr.H2("Experiment Summary"), - wr.MarkdownBlock(text=experiment_summary), - wr.H2("Dataset Information"), - wr.MarkdownBlock(text=dataset_info), - wr.H2("Model Information"), - wr.MarkdownBlock(text = model_info), - wr.PanelGrid( - panels=[ - wr.LinePlot(title="Train Loss", x="Step", y=["loss"], title_x="Step", title_y="Loss") - ], - ), - ] - -) - -# Save the report to W&B -report.save() -``` +{{< code language="python" source="/bluehawk/snippets/models_quickart.snippet.share_report.py" >}} For more information on how to create a report programmatically or how to create a report interactively with the W&B App, see [Create a report]({{< relref "/guides/core/reports/create-a-report.md" >}}) in the W&B Docs Developer guide. @@ -218,36 +113,6 @@ Use the [W&B Public APIs]({{< relref "/ref/python/public-api/_index.md" >}}) to The following code block demonstrates how to query the Model registry for all artifacts in a specific collection. It retrieves the collection and iterates through its versions, printing out the name and version of each artifact. -```python -import wandb - -# Initialize wandb API -api = wandb.Api() - -# Find all artifact versions that contains the string `model` and -# has either the tag `text-classification` or an `latest` alias -registry_filters = { - "name": {"$regex": "model"} -} - -# Use logical $or operator to filter artifact versions -version_filters = { - "$or": [ - {"tag": "text-classification"}, - {"alias": "latest"} - ] -} - -# Returns an iterable of all artifact versions that match the filters -artifacts = api.registries(filter=registry_filters).collections().versions(filter=version_filters) - -# Print out the name, collection, aliases, tags, and created_at date of each artifact found -for art in artifacts: - print(f"artifact name: {art.name}") - print(f"collection artifact belongs to: { art.collection.name}") - print(f"artifact aliases: {art.aliases}") - print(f"tags attached to artifact: {art.tags}") - print(f"artifact created at: {art.created_at}\n") -``` +{{< code language="python" source="/bluehawk/snippets/models_quickart.snippet.query_registry.py" >}} For more information on querying the registry, see the [Query registry items with MongoDB-style queries]({{< relref "/guides/core/registry/search_registry.md#query-registry-items-with-mongodb-style-queries" >}}). diff --git a/content/en/guides/quickstart.md b/content/en/guides/quickstart.md index 5110e5f9b4..4755fd37e9 100644 --- a/content/en/guides/quickstart.md +++ b/content/en/guides/quickstart.md @@ -29,17 +29,14 @@ To authenticate your machine with W&B, generate an API key from your user profil export WANDB_API_KEY=<your_api_key> ``` -2. Install the `wandb` library and log in. - - ```shell - pip install wandb - wandb login - ``` +2. Install the W&B Python SDK (`wandb`) and log in. + {{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb.sh" >}} {{% /tab %}} {{% tab header="Python" value="python" %}} + ```bash pip install wandb ``` diff --git a/content/en/ref/python/_index.md b/content/en/ref/python/_index.md index a319ec8405..9a1b67523a 100644 --- a/content/en/ref/python/_index.md +++ b/content/en/ref/python/_index.md @@ -15,14 +15,14 @@ To authenticate your machine with W&B, you must first generate an API key at htt ### Install and import packages -Install the W&B library. +Install the W&B library and log in: -``` -pip install wandb -``` +{{< code language="shell" source="/bluehawk/snippets/wandb_install.snippet.pip_install_wandb.sh" >}} ### Import W&B Python SDK: +Import the `wandb` package in your Python script or Jupyter Notebook. The following example demonstrates how to import the package, initialize a W&B run (`wandb.init()`), and log metrics (`wandb.Run.log()`): + ```python import wandb diff --git a/layouts/shortcodes/code.html b/layouts/shortcodes/code.html new file mode 100644 index 0000000000..5d2b9edde7 --- /dev/null +++ b/layouts/shortcodes/code.html @@ -0,0 +1,7 @@ +{{ $language := .Get "language" }} +{{ $source := .Get "source" }} +{{ $options := .Get "options" }} + +{{ with $source | readFile }} + {{ highlight (trim . "\n\r") $language $options }} +{{ end }} \ No newline at end of file