From fb8dcb905fd9ca242bdac19502bfbc6699405c8e Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Wed, 16 Jul 2025 17:34:07 -0700 Subject: [PATCH 1/5] feat: Add automated reference documentation generation - Add GitHub Action workflow for weekly doc generation - Create scripts for Service API, Python SDK, and TypeScript SDK docs - Use Mintlify's native OpenAPI support for Service API - Minimal dependencies to avoid Socket Security issues - Generate docs from Weave source repository --- .github/workflows/generate-reference-docs.yml | 144 +++ .gitignore | 32 + openapi.json | 939 ++++++++++++++++-- reference/service-api/calls.mdx | 8 + reference/service-api/calls/end.mdx | 4 + reference/service-api/calls/start.mdx | 4 + scripts/README.md | 88 ++ scripts/generate_python_sdk_docs.py | 130 +++ scripts/generate_service_api_spec.py | 72 ++ scripts/generate_typescript_sdk_docs.py | 193 ++++ scripts/requirements.txt | 4 + 11 files changed, 1527 insertions(+), 91 deletions(-) create mode 100644 .github/workflows/generate-reference-docs.yml create mode 100644 reference/service-api/calls.mdx create mode 100644 reference/service-api/calls/end.mdx create mode 100644 reference/service-api/calls/start.mdx create mode 100644 scripts/README.md create mode 100755 scripts/generate_python_sdk_docs.py create mode 100755 scripts/generate_service_api_spec.py create mode 100755 scripts/generate_typescript_sdk_docs.py create mode 100644 scripts/requirements.txt diff --git a/.github/workflows/generate-reference-docs.yml b/.github/workflows/generate-reference-docs.yml new file mode 100644 index 00000000..889a4d06 --- /dev/null +++ b/.github/workflows/generate-reference-docs.yml @@ -0,0 +1,144 @@ +name: Generate Reference Documentation + +on: + workflow_dispatch: + inputs: + weave_version: + description: 'Weave version (commit SHA, tag, or branch)' + required: false + default: 'main' + type: string + create_pr: + description: 'Create a pull request with changes' + required: false + default: true + type: boolean + + schedule: + # Run weekly on Mondays at 00:00 UTC + - cron: '0 0 * * 1' + +# Prevent multiple runs for the same PR/branch +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + generate-docs: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout documentation repository + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout Weave source repository + uses: actions/checkout@v4 + with: + repository: wandb/weave + ref: ${{ github.event.inputs.weave_version || 'main' }} + path: weave-source + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '18' + + - name: Cache Python dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/pip + .venv + key: ${{ runner.os }}-pip-${{ hashFiles('scripts/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Cache Node dependencies + uses: actions/cache@v4 + with: + path: | + ~/.pnpm-store + weave-source/sdks/node/node_modules + key: ${{ runner.os }}-pnpm-${{ hashFiles('weave-source/sdks/node/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm- + + - name: Install Python dependencies + run: | + python -m venv .venv + source .venv/bin/activate + pip install -r scripts/requirements.txt + + - name: Generate Service API documentation + run: | + source .venv/bin/activate + python scripts/generate_service_api_spec.py + echo "Service API documentation generated" + + - name: Generate Python SDK documentation + env: + WEAVE_SOURCE_PATH: ./weave-source + run: | + source .venv/bin/activate + python scripts/generate_python_sdk_docs.py + echo "Python SDK documentation generated" + + - name: Install pnpm + run: npm install -g pnpm + + - name: Generate TypeScript SDK documentation + env: + WEAVE_SOURCE_PATH: ./weave-source + run: | + source .venv/bin/activate + python scripts/generate_typescript_sdk_docs.py + echo "TypeScript SDK documentation generated" + + - name: Check for changes + id: check_changes + run: | + git add . + if [[ -n $(git status --porcelain) ]]; then + echo "changes=true" >> $GITHUB_OUTPUT + echo "Found changes in documentation" + git status + else + echo "changes=false" >> $GITHUB_OUTPUT + echo "No changes detected" + fi + + - name: Create Pull Request + if: steps.check_changes.outputs.changes == 'true' && (github.event.inputs.create_pr == 'true' || github.event_name == 'schedule') + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: "chore: Update reference documentation from Weave ${{ github.event.inputs.weave_version || 'main' }}" + title: "chore: Update reference documentation" + body: | + This PR updates the reference documentation generated from Weave source code. + + **Weave version**: ${{ github.event.inputs.weave_version || 'main' }} + **Generated on**: ${{ github.event.repository.updated_at }} + + ## Changes + - Service API documentation (from OpenAPI spec) + - Python SDK documentation (using lazydocs) + - TypeScript SDK documentation (using typedoc) + + Please review the changes carefully before merging. + branch: update-reference-docs-${{ github.run_number }} + delete-branch: true + labels: | + documentation + automated \ No newline at end of file diff --git a/.gitignore b/.gitignore index ce8d2c43..f46d38dd 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,35 @@ yarn-error.log* venv/ __pycache__/ *.pyc + +# Python virtual environment +.venv/ +venv/ +env/ + +# Python cache +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python + +# IDE +.vscode/ +.idea/ + +# Temporary files +*.tmp +*.bak +*.swp +*~ + +# Generated documentation cache +scripts/.cache/ + +# TypeDoc configuration (generated) +weave-source/sdks/node/typedoc.json + +# OS files +.DS_Store +Thumbs.db diff --git a/openapi.json b/openapi.json index bbab7d50..8c258a4e 100644 --- a/openapi.json +++ b/openapi.json @@ -1,29 +1,85 @@ { "openapi": "3.1.0", - "servers": [ - { - "url": "https://trace.wandb.ai" - } - ], "info": { - "title": "FastAPI", - "version": "0.1.0" + "title": "Weave Service API", + "version": "0.1.0", + "description": "REST API endpoints for the Weave service" }, "paths": { - "/server_info": { + "/health": { "get": { "tags": [ "Service" ], - "summary": "Server Info", - "operationId": "server_info_server_info_get", + "summary": "Read Root", + "operationId": "read_root_health_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + }, + "/version": { + "get": { + "tags": [ + "Service" + ], + "summary": "Read Version", + "operationId": "read_version_version_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + }, + "/geolocate": { + "get": { + "tags": [ + "Service" + ], + "summary": "Get Caller Location", + "description": "Lookup the geographic location of a user based on their IP address.\n\nThis API exists for debugging purposes and may not be available in the future.", + "operationId": "get_caller_location_geolocate_get", + "parameters": [ + { + "name": "ip", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "IP address to geolocate, defaults to client IP address", + "title": "Ip" + }, + "description": "IP address to geolocate, defaults to client IP address", + "example": "1.2.3.4" + } + ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ServerInfoRes" + "$ref": "#/components/schemas/GeolocationRes" } } } @@ -38,35 +94,45 @@ } } } - }, - "security": [ - { - "HTTPBasic": [] - } - ] + } } }, - "/health": { + "/server_info": { "get": { "tags": [ "Service" ], - "summary": "Read Root", - "operationId": "read_root_health_get", + "summary": "Server Info", + "operationId": "server_info_server_info_get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { - "additionalProperties": { - "type": "string" - }, - "type": "object", - "title": "Response Read Root Health Get" + "$ref": "#/components/schemas/ServerInfoRes" } } } + } + } + } + }, + "/otel/v1/traces": { + "post": { + "tags": [ + "OpenTelemetry" + ], + "summary": "Export Trace", + "operationId": "export_trace_otel_v1_traces_post", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } }, "422": { "description": "Validation Error", @@ -444,15 +510,10 @@ }, "responses": { "200": { - "description": "Stream of data in JSONL format", + "description": "Successful Response", "content": { - "application/jsonl": { - "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Schema" - } - } + "application/json": { + "schema": {} } } }, @@ -791,47 +852,30 @@ ] } }, - "/table/query_stream": { + "/table/query_stats": { "post": { - "summary": "Table Query Stream", - "operationId": "table_query_stream_table_query_stream_post", - "security": [ - { - "HTTPBasic": [] - } - ], - "parameters": [ - { - "name": "accept", - "in": "header", - "required": false, - "schema": { - "type": "string", - "default": "application/jsonl", - "title": "Accept" - } - } + "tags": [ + "Tables" ], + "summary": "Table Query Stats", + "operationId": "table_query_stats_table_query_stats_post", "requestBody": { - "required": true, "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/TableQueryReq" + "$ref": "#/components/schemas/TableQueryStatsReq" } } - } + }, + "required": true }, "responses": { "200": { - "description": "Stream of data in JSONL format", + "description": "Successful Response", "content": { - "application/jsonl": { + "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Schema" - } + "$ref": "#/components/schemas/TableQueryStatsRes" } } } @@ -846,21 +890,26 @@ } } } - } + }, + "security": [ + { + "HTTPBasic": [] + } + ] } }, - "/table/query_stats": { + "/table/query_stats_batch": { "post": { "tags": [ "Tables" ], - "summary": "Table Query Stats", - "operationId": "table_query_stats_table_query_stats_post", + "summary": "Table Query Stats Batch", + "operationId": "table_query_stats_batch_table_query_stats_batch_post", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/TableQueryStatsReq" + "$ref": "#/components/schemas/TableQueryStatsBatchReq" } } }, @@ -872,7 +921,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/TableQueryStatsRes" + "$ref": "#/components/schemas/TableQueryStatsBatchRes" } } } @@ -1006,9 +1055,57 @@ }, "responses": { "200": { - "description": "Binary file content stream", + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBasic": [] + } + ] + } + }, + "/files/query_stats": { + "post": { + "tags": [ + "Files" + ], + "summary": "Files Stats", + "operationId": "files_stats_files_query_stats_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FilesStatsReq" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", "content": { - "application/octet-stream": {} + "application/json": { + "schema": { + "$ref": "#/components/schemas/FilesStatsRes" + } + } } }, "422": { @@ -1353,6 +1450,50 @@ } ] } + }, + "/threads/stream_query": { + "post": { + "tags": [ + "Threads" + ], + "summary": "Threads Query Stream", + "operationId": "threads_query_stream_threads_stream_query_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ThreadsQueryReq" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "security": [ + { + "HTTPBasic": [] + } + ] + } } }, "components": { @@ -1405,7 +1546,8 @@ "required": [ "$and" ], - "title": "AndOperation" + "title": "AndOperation", + "description": "Logical AND. All conditions must evaluate to true.\n\nExample:\n ```\n {\n \"$and\": [\n {\"$eq\": [{\"$getField\": \"op_name\"}, {\"$literal\": \"predict\"}]},\n {\"$gt\": [{\"$getField\": \"summary.usage.tokens\"}, {\"$literal\": 1000}]}\n ]\n }\n ```" }, "Body_file_create_file_create_post": { "properties": { @@ -1544,6 +1686,30 @@ ], "title": "Include Costs", "default": false + }, + "include_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Storage Size", + "default": false + }, + "include_total_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Total Storage Size", + "default": false } }, "type": "object", @@ -1612,6 +1778,28 @@ ], "title": "Parent Id" }, + "thread_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Thread Id" + }, + "turn_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Turn Id" + }, "started_at": { "type": "string", "format": "date-time", @@ -1682,17 +1870,50 @@ ], "title": "Wb Run Id" }, - "deleted_at": { + "wb_run_step": { "anyOf": [ { - "type": "string", - "format": "date-time" + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Wb Run Step" + }, + "deleted_at": { + "anyOf": [ + { + "type": "string", + "format": "date-time" }, { "type": "null" } ], "title": "Deleted At" + }, + "storage_size_bytes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Storage Size Bytes" + }, + "total_storage_size_bytes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Storage Size Bytes" } }, "type": "object", @@ -1907,6 +2128,34 @@ ], "title": "Call Ids" }, + "thread_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Thread Ids" + }, + "turn_ids": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Turn Ids" + }, "trace_roots_only": { "anyOf": [ { @@ -2038,6 +2287,32 @@ "description": "Beta, subject to change. If true, the response will include feedback for each call.", "default": false }, + "include_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Storage Size", + "description": "Beta, subject to change. If true, the response will include the storage size for a call.", + "default": false + }, + "include_total_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Total Storage Size", + "description": "Beta, subject to change. If true, the response will include the total storage size for a trace.", + "default": false + }, "columns": { "anyOf": [ { @@ -2105,6 +2380,29 @@ "type": "null" } ] + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Limit" + }, + "include_total_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Total Storage Size", + "default": false } }, "type": "object", @@ -2118,6 +2416,17 @@ "count": { "type": "integer", "title": "Count" + }, + "total_storage_size_bytes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Total Storage Size Bytes" } }, "type": "object", @@ -2136,7 +2445,8 @@ "required": [ "$contains" ], - "title": "ContainsOperation" + "title": "ContainsOperation", + "description": "Case-insensitive substring match.\n\nNot part of MongoDB. Weave-specific addition.\n\nExample:\n ```\n {\n \"$contains\": {\n \"input\": {\"$getField\": \"display_name\"},\n \"substr\": {\"$literal\": \"llm\"},\n \"case_insensitive\": true\n }\n }\n ```" }, "ContainsSpec": { "properties": { @@ -2234,7 +2544,8 @@ "input", "substr" ], - "title": "ContainsSpec" + "title": "ContainsSpec", + "description": "Specification for the `$contains` operation.\n\n- `input`: The string to search.\n- `substr`: The substring to search for.\n- `case_insensitive`: If true, match is case-insensitive." }, "ConvertOperation": { "properties": { @@ -2246,7 +2557,8 @@ "required": [ "$convert" ], - "title": "ConvertOperation" + "title": "ConvertOperation", + "description": "Convert the input value to a specific type (e.g., `int`, `bool`, `string`).\n\nExample:\n ```\n {\n \"$convert\": {\n \"input\": {\"$getField\": \"inputs.value\"},\n \"to\": \"int\"\n }\n }\n ```" }, "ConvertSpec": { "properties": { @@ -2305,7 +2617,8 @@ "input", "to" ], - "title": "ConvertSpec" + "title": "ConvertSpec", + "description": "Specifies conversion details for `$convert`.\n\n- `input`: The operand to convert.\n- `to`: The type to convert to." }, "CostCreateInput": { "properties": { @@ -2829,7 +3142,8 @@ "required": [ "$eq" ], - "title": "EqOperation" + "title": "EqOperation", + "description": "Equality check between two operands.\n\nExample:\n ```\n {\n \"$eq\": [{\"$getField\": \"op_name\"}, {\"$literal\": \"predict\"}]\n }\n ```" }, "FeedbackCreateReq": { "properties": { @@ -3303,6 +3617,119 @@ ], "title": "FileCreateRes" }, + "FilesStatsReq": { + "properties": { + "project_id": { + "type": "string", + "title": "Project Id" + } + }, + "type": "object", + "required": [ + "project_id" + ], + "title": "FilesStatsReq" + }, + "FilesStatsRes": { + "properties": { + "total_size_bytes": { + "type": "integer", + "title": "Total Size Bytes" + } + }, + "type": "object", + "required": [ + "total_size_bytes" + ], + "title": "FilesStatsRes" + }, + "Geolocation": { + "properties": { + "file_index": { + "type": "integer", + "title": "File Index", + "description": "row in CSV file" + }, + "range_start_int": { + "type": "integer", + "title": "Range Start Int", + "description": "Start of IP range as integer" + }, + "range_end_int": { + "type": "integer", + "title": "Range End Int", + "description": "End of IP range as integer" + }, + "range_start_ip": { + "type": "string", + "title": "Range Start Ip", + "description": "Start of IP range in dotted decimal notation" + }, + "range_end_ip": { + "type": "string", + "title": "Range End Ip", + "description": "End of IP range in dotted decimal notation" + }, + "country_code": { + "type": "string", + "title": "Country Code", + "description": "2-letter country code in ISO 3166-1 Alpha 2 format" + }, + "country_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Country Name", + "description": "Country name, None if could not be determined" + } + }, + "type": "object", + "required": [ + "file_index", + "range_start_int", + "range_end_int", + "range_start_ip", + "range_end_ip", + "country_code" + ], + "title": "Geolocation" + }, + "GeolocationRes": { + "properties": { + "ip": { + "type": "string", + "title": "Ip", + "description": "Resolved IP address, useful for debugging" + }, + "location": { + "anyOf": [ + { + "$ref": "#/components/schemas/Geolocation" + }, + { + "type": "null" + } + ], + "description": "Information about the location of the IP address, None if could not be determined" + }, + "allowed": { + "type": "boolean", + "title": "Allowed", + "description": "Whether the IP address is allowed to be used for inference.", + "default": false + } + }, + "type": "object", + "required": [ + "ip" + ], + "title": "GeolocationRes" + }, "GetFieldOperator": { "properties": { "$getField": { @@ -3314,7 +3741,8 @@ "required": [ "$getField" ], - "title": "GetFieldOperator" + "title": "GetFieldOperator", + "description": "Access a field on the traced call.\n\nSupports dot notation for nested access, e.g. `summary.usage.tokens`.\n\nOnly works on fields present in the `CallSchema`, including:\n- Top-level fields like `op_name`, `trace_id`, `started_at`\n- Nested fields like `inputs.input_name`, `summary.usage.tokens`, etc.\n\nExample:\n ```\n {\"$getField\": \"op_name\"}\n ```" }, "GtOperation": { "properties": { @@ -3405,7 +3833,8 @@ "required": [ "$gt" ], - "title": "GtOperation" + "title": "GtOperation", + "description": "Greater than comparison.\n\nExample:\n ```\n {\n \"$gt\": [{\"$getField\": \"summary.usage.tokens\"}, {\"$literal\": 100}]\n }\n ```" }, "GteOperation": { "properties": { @@ -3496,7 +3925,8 @@ "required": [ "$gte" ], - "title": "GteOperation" + "title": "GteOperation", + "description": "Greater than or equal comparison.\n\nExample:\n ```\n {\n \"$gte\": [{\"$getField\": \"summary.usage.tokens\"}, {\"$literal\": 100}]\n }\n ```" }, "HTTPValidationError": { "properties": { @@ -3603,7 +4033,8 @@ "required": [ "$in" ], - "title": "InOperation" + "title": "InOperation", + "description": "Membership check.\n\nReturns true if the left operand is in the list provided as the second operand.\n\nExample:\n ```\n {\n \"$in\": [\n {\"$getField\": \"op_name\"},\n [{\"$literal\": \"predict\"}, {\"$literal\": \"generate\"}]\n ]\n }\n ```" }, "LLMUsageSchema": { "properties": { @@ -3716,7 +4147,8 @@ "required": [ "$literal" ], - "title": "LiteralOperation" + "title": "LiteralOperation", + "description": "Represents a constant value in the query language.\n\nThis can be any standard JSON-serializable value.\n\nExample:\n ```\n {\"$literal\": \"predict\"}\n ```" }, "NotOperation": { "properties": { @@ -3770,7 +4202,8 @@ "required": [ "$not" ], - "title": "NotOperation" + "title": "NotOperation", + "description": "Logical NOT. Inverts the condition.\n\nExample:\n ```\n {\n \"$not\": [\n {\"$eq\": [{\"$getField\": \"op_name\"}, {\"$literal\": \"debug\"}]}\n ]\n }\n ```" }, "ObjCreateReq": { "properties": { @@ -3937,6 +4370,19 @@ "title": "Metadata Only", "description": "If true, the `val` column is not read from the database and is empty.All other fields are returned.", "default": false + }, + "include_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Storage Size", + "description": "If true, the `size_bytes` column is returned.", + "default": false } }, "type": "object", @@ -4063,6 +4509,17 @@ ], "title": "Base Object Class" }, + "leaf_object_class": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Leaf Object Class" + }, "val": { "title": "Val" }, @@ -4077,6 +4534,17 @@ ], "title": "Wb User Id", "description": "Do not set directly. Server will automatically populate this field." + }, + "size_bytes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Size Bytes" } }, "type": "object", @@ -4175,6 +4643,32 @@ ] ] }, + "leaf_object_classes": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Leaf Object Classes", + "description": "Filter objects by their leaf classes", + "examples": [ + [ + "Model" + ], + [ + "Dataset" + ], + [ + "LLMStructuredCompletionModel" + ] + ] + }, "object_ids": { "anyOf": [ { @@ -4279,7 +4773,8 @@ "required": [ "$or" ], - "title": "OrOperation" + "title": "OrOperation", + "description": "Logical OR. At least one condition must be true.\n\nExample:\n ```\n {\n \"$or\": [\n {\"$eq\": [{\"$getField\": \"op_name\"}, {\"$literal\": \"a\"}]},\n {\"$eq\": [{\"$getField\": \"op_name\"}, {\"$literal\": \"b\"}]}\n ]\n }\n ```" }, "Query": { "properties": { @@ -4317,7 +4812,8 @@ "required": [ "$expr" ], - "title": "Query" + "title": "Query", + "description": "The top-level object for querying traced calls.\n\nThe `Query` wraps a single `$expr`, which uses Mongo-style aggregation operators\nto filter calls. This expression can combine logical conditions, comparisons,\ntype conversions, and string matching.\n\nExamples:\n ```\n # Filter calls where op_name == \"predict\"\n {\n \"$expr\": {\n \"$eq\": [\n {\"$getField\": \"op_name\"},\n {\"$literal\": \"predict\"}\n ]\n }\n }\n\n # Filter where a call's display name contains \"llm\"\n {\n \"$expr\": {\n \"$contains\": {\n \"input\": {\"$getField\": \"display_name\"},\n \"substr\": {\"$literal\": \"llm\"},\n \"case_insensitive\": true\n }\n }\n }\n ```" }, "RefsReadBatchReq": { "properties": { @@ -4438,6 +4934,28 @@ ], "title": "Parent Id" }, + "thread_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Thread Id" + }, + "turn_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Turn Id" + }, "started_at": { "type": "string", "format": "date-time", @@ -4473,6 +4991,17 @@ } ], "title": "Wb Run Id" + }, + "wb_run_step": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Wb Run Step" } }, "type": "object", @@ -4493,6 +5022,13 @@ }, "type": "object", "title": "Usage" + }, + "status_counts": { + "additionalProperties": { + "type": "integer" + }, + "type": "object", + "title": "Status Counts" } }, "additionalProperties": true, @@ -4726,6 +5262,72 @@ ], "title": "TableQueryRes" }, + "TableQueryStatsBatchReq": { + "properties": { + "project_id": { + "type": "string", + "title": "Project Id", + "description": "The ID of the project", + "examples": [ + "my_entity/my_project" + ] + }, + "digests": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Digests", + "description": "The digests of the tables to query", + "default": [], + "examples": [ + "aonareimsvtl13apimtalpa4435rpmgnaemrpgmarltarstaorsnte134avrims", + "smirva431etnsroatsratlrampgrmeangmpr5344aplatmipa31ltvsmi\u0435\u0440\u0430noa" + ] + }, + "include_storage_size": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Include Storage Size", + "description": "If true, the `storage_size_bytes` column is returned.", + "default": false + } + }, + "type": "object", + "required": [ + "project_id" + ], + "title": "TableQueryStatsBatchReq" + }, + "TableQueryStatsBatchRes": { + "properties": { + "tables": { + "items": { + "$ref": "#/components/schemas/TableStatsRow" + }, + "type": "array", + "title": "Tables" + } + }, + "type": "object", + "required": [ + "tables" + ], + "title": "TableQueryStatsBatchRes" + }, "TableQueryStatsReq": { "properties": { "project_id": { @@ -4739,10 +5341,7 @@ "digest": { "type": "string", "title": "Digest", - "description": "The digest of the table to query", - "examples": [ - "aonareimsvtl13apimtalpa4435rpmgnaemrpgmarltarstaorsnte134avrims" - ] + "description": "The digest of the table to query" } }, "type": "object", @@ -4841,6 +5440,35 @@ ], "title": "TableSchemaForInsert" }, + "TableStatsRow": { + "properties": { + "count": { + "type": "integer", + "title": "Count" + }, + "digest": { + "type": "string", + "title": "Digest" + }, + "storage_size_bytes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Storage Size Bytes" + } + }, + "type": "object", + "required": [ + "count", + "digest" + ], + "title": "TableStatsRow" + }, "TableUpdateReq": { "properties": { "project_id": { @@ -4898,6 +5526,130 @@ ], "title": "TableUpdateRes" }, + "ThreadsQueryFilter": { + "properties": { + "after_datetime": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "After Datetime", + "description": "Only include threads with start_time after this timestamp", + "examples": [ + "2024-01-01T00:00:00Z" + ] + }, + "before_datetime": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "null" + } + ], + "title": "Before Datetime", + "description": "Only include threads with last_updated before this timestamp", + "examples": [ + "2024-12-31T23:59:59Z" + ] + } + }, + "type": "object", + "title": "ThreadsQueryFilter" + }, + "ThreadsQueryReq": { + "properties": { + "project_id": { + "type": "string", + "title": "Project Id", + "description": "The ID of the project", + "examples": [ + "my_entity/my_project" + ] + }, + "filter": { + "anyOf": [ + { + "$ref": "#/components/schemas/ThreadsQueryFilter" + }, + { + "type": "null" + } + ], + "description": "Filter criteria for the threads query" + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Limit", + "description": "Maximum number of threads to return" + }, + "offset": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Offset", + "description": "Number of threads to skip" + }, + "sort_by": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/SortBy" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Sort By", + "description": "Sorting criteria for the threads. Supported fields: 'thread_id', 'turn_count', 'start_time', 'last_updated', 'p50_turn_duration_ms', 'p99_turn_duration_ms'.", + "examples": [ + [ + { + "direction": "desc", + "field": "last_updated" + } + ] + ] + } + }, + "type": "object", + "required": [ + "project_id" + ], + "title": "ThreadsQueryReq", + "description": "Query threads with aggregated statistics based on turn calls only.\n\nTurn calls are the immediate children of thread contexts (where call.id == turn_id).\nThis provides meaningful conversation-level statistics rather than including all\nnested implementation details." + }, + "TraceStatus": { + "type": "string", + "enum": [ + "success", + "error", + "running", + "descendant_error" + ], + "title": "TraceStatus" + }, "ValidationError": { "properties": { "loc": { @@ -4938,5 +5690,10 @@ "scheme": "basic" } } - } + }, + "servers": [ + { + "url": "https://trace.wandb.ai" + } + ] } \ No newline at end of file diff --git a/reference/service-api/calls.mdx b/reference/service-api/calls.mdx new file mode 100644 index 00000000..d9c6bd06 --- /dev/null +++ b/reference/service-api/calls.mdx @@ -0,0 +1,8 @@ +--- +title: 'Calls API' +description: 'Endpoints for managing execution traces and call data' +--- + +## Overview + +The Calls API provides endpoints to create, read, update, and query execution traces in Weave. \ No newline at end of file diff --git a/reference/service-api/calls/end.mdx b/reference/service-api/calls/end.mdx new file mode 100644 index 00000000..24c0d5a1 --- /dev/null +++ b/reference/service-api/calls/end.mdx @@ -0,0 +1,4 @@ +--- +title: 'End Call' +openapi: 'POST /call/end' +--- \ No newline at end of file diff --git a/reference/service-api/calls/start.mdx b/reference/service-api/calls/start.mdx new file mode 100644 index 00000000..fe63791d --- /dev/null +++ b/reference/service-api/calls/start.mdx @@ -0,0 +1,4 @@ +--- +title: 'Start Call' +openapi: 'POST /call/start' +--- \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..d847b5f6 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,88 @@ +# Reference Documentation Generation Scripts + +This directory contains scripts to automatically generate reference documentation for the Weave project from source code. + +## Overview + +The documentation generation system consists of three main components: + +1. **Service API Documentation** - Generated from the OpenAPI specification served by the Weave service +2. **Python SDK Documentation** - Generated from Python source code using lazydocs +3. **TypeScript SDK Documentation** - Generated from TypeScript source code using typedoc + +## Key Improvements + +This implementation provides several improvements over the previous approach: + +- **Minimal Dependencies**: Only essential packages (requests, lazydocs, pyyaml) reducing security risks +- **Native Mintlify Support**: Leverages Mintlify's built-in OpenAPI support for Service API docs +- **Clean Output**: Simplified post-processing that maintains documentation quality +- **No Custom Processing**: Eliminates complex custom scripts that introduced Socket Security issues + +## Scripts + +### `generate_service_api_spec.py` +- Downloads the OpenAPI specification from https://trace.wandb.ai/openapi.json +- Saves it directly for Mintlify to consume +- No custom processing needed - Mintlify handles OpenAPI natively + +### `generate_python_sdk_docs.py` +- Uses lazydocs to generate Python API documentation +- Installs Weave from source for accurate documentation +- Post-processes output to add Mintlify frontmatter +- Converts .md files to .mdx for Mintlify compatibility + +### `generate_typescript_sdk_docs.py` +- Uses typedoc with typedoc-plugin-markdown +- Generates clean markdown documentation +- Post-processes to add Mintlify frontmatter +- Handles Node.js dependency management with pnpm + +## GitHub Action + +The `.github/workflows/generate-reference-docs.yml` workflow: + +- Runs weekly on a schedule or manually via workflow_dispatch +- Checks out both the documentation repo and Weave source +- Generates all three types of documentation +- Creates a pull request if changes are detected +- Uses caching for faster builds + +## Usage + +### Manual Generation + +1. Set up Python virtual environment: + ```bash + python -m venv .venv + source .venv/bin/activate + pip install -r scripts/requirements.txt + ``` + +2. Clone or provide path to Weave source: + ```bash + export WEAVE_SOURCE_PATH=/path/to/weave + ``` + +3. Run individual scripts: + ```bash + python scripts/generate_service_api_spec.py + python scripts/generate_python_sdk_docs.py + python scripts/generate_typescript_sdk_docs.py + ``` + +### GitHub Action + +The workflow can be triggered: +- Manually from the Actions tab with optional parameters +- Automatically every Monday at 00:00 UTC +- Parameters: + - `weave_version`: Branch, tag, or commit SHA (default: main) + - `create_pr`: Whether to create a PR (default: true) + +## Requirements + +- Python 3.11+ +- Node.js 18+ +- pnpm (will be installed automatically if missing) +- Access to wandb/weave repository \ No newline at end of file diff --git a/scripts/generate_python_sdk_docs.py b/scripts/generate_python_sdk_docs.py new file mode 100755 index 00000000..dddeaffa --- /dev/null +++ b/scripts/generate_python_sdk_docs.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +""" +Generate Python SDK reference documentation using lazydocs. + +This simplified version generates clean markdown documentation +suitable for Mintlify without custom processing that might +introduce security issues. +""" + +import os +import shutil +import subprocess +import sys +from pathlib import Path + + +def install_weave_from_source(weave_path): + """Install Weave from source for documentation generation.""" + print(f"Installing Weave from source: {weave_path}") + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-e", weave_path], + check=True, + capture_output=True, + text=True + ) + print("Weave installed successfully") + except subprocess.CalledProcessError as e: + print(f"Error installing Weave: {e.stderr}", file=sys.stderr) + sys.exit(1) + + +def generate_docs_with_lazydocs(output_dir): + """Generate documentation using lazydocs.""" + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + # Key modules to document + modules_to_document = [ + "weave", + "weave.trace.op", + "weave.trace.weave_client", + "weave.trace.util", + "weave.trace_server.trace_server_interface", + ] + + for module in modules_to_document: + print(f"Generating documentation for {module}...") + + # Use lazydocs command line interface for cleaner output + cmd = [ + "lazydocs", + "--output-path", str(output_path), + "--overview-file", "", # No overview file + "--src-base-url", "https://github.com/wandb/weave/blob/master", + module + ] + + try: + subprocess.run(cmd, check=True, capture_output=True, text=True) + print(f" ✓ Generated docs for {module}") + except subprocess.CalledProcessError as e: + print(f" ✗ Error generating docs for {module}: {e.stderr}") + + +def post_process_docs(docs_dir): + """Post-process the generated documentation for Mintlify.""" + docs_path = Path(docs_dir) + + for md_file in docs_path.rglob("*.md"): + content = md_file.read_text() + + # Add Mintlify frontmatter + module_name = md_file.stem + if module_name == "README": + module_name = md_file.parent.name + + frontmatter = f"""--- +title: '{module_name}' +description: 'Python SDK reference for {module_name}' +--- + +""" + + # Only add frontmatter if not already present + if not content.startswith("---"): + content = frontmatter + content + + # Change .md extension to .mdx + mdx_file = md_file.with_suffix('.mdx') + mdx_file.write_text(content) + + # Remove original .md file + md_file.unlink() + + print(f" ✓ Processed {md_file.name} → {mdx_file.name}") + + +def main(): + """Main function.""" + # Check if we're in the Weave repo or need to use a separate source + weave_source = os.environ.get("WEAVE_SOURCE_PATH", "../weave-source") + + if not Path(weave_source).exists(): + print(f"Weave source not found at {weave_source}") + print("Please set WEAVE_SOURCE_PATH environment variable or ensure weave-source exists") + sys.exit(1) + + # Install Weave from source + install_weave_from_source(weave_source) + + # Generate documentation + output_dir = "reference/python-sdk/weave" + print(f"\nGenerating Python SDK documentation to {output_dir}...") + + # Clean existing docs + if Path(output_dir).exists(): + shutil.rmtree(output_dir) + + generate_docs_with_lazydocs(output_dir) + + # Post-process for Mintlify + print("\nPost-processing documentation for Mintlify...") + post_process_docs(output_dir) + + print("\nPython SDK documentation generation complete!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/generate_service_api_spec.py b/scripts/generate_service_api_spec.py new file mode 100755 index 00000000..906e3518 --- /dev/null +++ b/scripts/generate_service_api_spec.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +""" +Download the Weave Service API OpenAPI specification. + +This script fetches the OpenAPI spec from the Weave service and saves it +in a format that Mintlify can directly consume, eliminating the need for +custom processing. +""" + +import json +import requests +import sys +from pathlib import Path + + +def download_openapi_spec(): + """Download the OpenAPI spec from Weave service.""" + url = "https://trace.wandb.ai/openapi.json" + + print(f"Downloading OpenAPI spec from {url}...") + try: + response = requests.get(url, timeout=30) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + print(f"Error downloading OpenAPI spec: {e}", file=sys.stderr) + sys.exit(1) + + +def update_server_url(spec): + """Update the server URL to the production endpoint.""" + if "servers" not in spec or not spec["servers"]: + spec["servers"] = [] + + # Ensure the production server is listed + spec["servers"] = [{"url": "https://trace.wandb.ai"}] + return spec + + +def save_openapi_spec(spec, output_path): + """Save the OpenAPI spec to a file.""" + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, 'w') as f: + json.dump(spec, f, indent=2) + + print(f"OpenAPI spec saved to {output_path}") + + +def main(): + """Main function.""" + # Download the spec + spec = download_openapi_spec() + + # Update server URL + spec = update_server_url(spec) + + # Update the title and description for better presentation + if "info" in spec: + spec["info"]["title"] = "Weave Service API" + spec["info"]["description"] = "REST API endpoints for the Weave service" + + # Save to the appropriate location for Mintlify + output_path = "openapi.json" + save_openapi_spec(spec, output_path) + + print("Service API spec generation complete!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/generate_typescript_sdk_docs.py b/scripts/generate_typescript_sdk_docs.py new file mode 100755 index 00000000..54f09eb3 --- /dev/null +++ b/scripts/generate_typescript_sdk_docs.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +""" +Generate TypeScript SDK reference documentation using typedoc. + +This script generates TypeScript documentation in Markdown format +suitable for Mintlify. +""" + +import json +import os +import shutil +import subprocess +import sys +from pathlib import Path + + +def check_node_dependencies(): + """Check if Node.js and pnpm are available.""" + try: + subprocess.run(["node", "--version"], check=True, capture_output=True) + print("✓ Node.js is installed") + except (subprocess.CalledProcessError, FileNotFoundError): + print("✗ Node.js is not installed", file=sys.stderr) + sys.exit(1) + + try: + subprocess.run(["pnpm", "--version"], check=True, capture_output=True) + print("✓ pnpm is installed") + except (subprocess.CalledProcessError, FileNotFoundError): + print("Installing pnpm...") + try: + subprocess.run(["npm", "install", "-g", "pnpm"], check=True) + print("✓ pnpm installed successfully") + except subprocess.CalledProcessError as e: + print(f"✗ Failed to install pnpm: {e}", file=sys.stderr) + sys.exit(1) + + +def setup_typescript_project(weave_source): + """Set up the TypeScript project and install dependencies.""" + sdk_path = Path(weave_source) / "sdks" / "node" + + if not sdk_path.exists(): + print(f"TypeScript SDK not found at {sdk_path}", file=sys.stderr) + sys.exit(1) + + print(f"Setting up TypeScript project at {sdk_path}") + + # Install dependencies + os.chdir(sdk_path) + try: + print("Installing dependencies...") + subprocess.run(["pnpm", "install"], check=True) + + # Install typedoc and markdown plugin + print("Installing typedoc...") + subprocess.run([ + "pnpm", "add", "-D", + "typedoc", + "typedoc-plugin-markdown" + ], check=True) + + print("✓ Dependencies installed successfully") + except subprocess.CalledProcessError as e: + print(f"✗ Failed to install dependencies: {e}", file=sys.stderr) + sys.exit(1) + + return sdk_path + + +def generate_typedoc_config(sdk_path, output_path): + """Generate typedoc configuration.""" + config = { + "entryPoints": ["src/index.ts"], + "out": str(output_path), + "plugin": ["typedoc-plugin-markdown"], + "readme": "none", + "hideBreadcrumbs": True, + "hideInPageTOC": True, + "disableSources": True, + "excludePrivate": True, + "excludeProtected": True, + "excludeInternal": True, + "githubPages": False, + "cleanOutputDir": True + } + + config_path = sdk_path / "typedoc.json" + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + + return config_path + + +def run_typedoc(sdk_path, output_path): + """Run typedoc to generate documentation.""" + print(f"Generating TypeScript documentation to {output_path}...") + + os.chdir(sdk_path) + try: + subprocess.run([ + "pnpm", "exec", "typedoc" + ], check=True) + print("✓ TypeScript documentation generated successfully") + except subprocess.CalledProcessError as e: + print(f"✗ Failed to generate documentation: {e}", file=sys.stderr) + sys.exit(1) + + +def post_process_typescript_docs(docs_dir): + """Post-process the generated TypeScript documentation for Mintlify.""" + docs_path = Path(docs_dir) + + # Process all markdown files + for md_file in docs_path.rglob("*.md"): + content = md_file.read_text() + + # Extract title from the first heading + lines = content.split('\n') + title = md_file.stem + for line in lines: + if line.startswith('# '): + title = line[2:].strip() + break + + # Add Mintlify frontmatter + frontmatter = f"""--- +title: '{title}' +description: 'TypeScript SDK reference for {title}' +--- + +""" + + # Only add frontmatter if not already present + if not content.startswith("---"): + content = frontmatter + content + + # Clean up typedoc artifacts + content = content.replace('**`', '`') + content = content.replace('`**', '`') + + # Change .md extension to .mdx + mdx_file = md_file.with_suffix('.mdx') + mdx_file.write_text(content) + + # Remove original .md file + md_file.unlink() + + print(f" ✓ Processed {md_file.name} → {mdx_file.name}") + + +def main(): + """Main function.""" + # Check Node.js dependencies + check_node_dependencies() + + # Get Weave source path + weave_source = os.environ.get("WEAVE_SOURCE_PATH", "../weave-source") + + if not Path(weave_source).exists(): + print(f"Weave source not found at {weave_source}") + print("Please set WEAVE_SOURCE_PATH environment variable") + sys.exit(1) + + # Setup TypeScript project + sdk_path = setup_typescript_project(weave_source) + + # Output directory + current_dir = Path.cwd() + output_dir = current_dir / "reference" / "typescript-sdk" / "weave" + + # Clean existing docs + if output_dir.exists(): + shutil.rmtree(output_dir) + + # Generate typedoc config + generate_typedoc_config(sdk_path, output_dir) + + # Run typedoc + run_typedoc(sdk_path, output_dir) + + # Change back to original directory + os.chdir(current_dir) + + # Post-process for Mintlify + print("\nPost-processing documentation for Mintlify...") + post_process_typescript_docs(output_dir) + + print("\nTypeScript SDK documentation generation complete!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..dc4c421b --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,4 @@ +# Core dependencies for documentation generation +requests>=2.31.0 +lazydocs>=0.4.8 +pyyaml>=6.0 \ No newline at end of file From 88e76943e9ae19cd3db6cb7077d3d23fa3724e5e Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Wed, 16 Jul 2025 17:39:43 -0700 Subject: [PATCH 2/5] refactor: Update doc generation to use Weave version instead of cloning source - Python SDK: Install from PyPI or GitHub using version/tag/hash - TypeScript SDK: Download source archive from GitHub for specific version - Remove repository cloning from GitHub Action - Update documentation to reflect version-based approach --- .github/workflows/generate-reference-docs.yml | 22 +--- scripts/README.md | 30 +++-- scripts/generate_python_sdk_docs.py | 52 ++++++--- scripts/generate_typescript_sdk_docs.py | 109 +++++++++++++----- 4 files changed, 137 insertions(+), 76 deletions(-) diff --git a/.github/workflows/generate-reference-docs.yml b/.github/workflows/generate-reference-docs.yml index 889a4d06..49f17efc 100644 --- a/.github/workflows/generate-reference-docs.yml +++ b/.github/workflows/generate-reference-docs.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: weave_version: - description: 'Weave version (commit SHA, tag, or branch)' + description: 'Weave version (tag like v0.50.0, commit SHA, or branch name)' required: false default: 'main' type: string @@ -36,14 +36,6 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout Weave source repository - uses: actions/checkout@v4 - with: - repository: wandb/weave - ref: ${{ github.event.inputs.weave_version || 'main' }} - path: weave-source - token: ${{ secrets.GITHUB_TOKEN }} - - name: Set up Python uses: actions/setup-python@v5 with: @@ -64,13 +56,11 @@ jobs: restore-keys: | ${{ runner.os }}-pip- - - name: Cache Node dependencies + - name: Cache pnpm store uses: actions/cache@v4 with: - path: | - ~/.pnpm-store - weave-source/sdks/node/node_modules - key: ${{ runner.os }}-pnpm-${{ hashFiles('weave-source/sdks/node/pnpm-lock.yaml') }} + path: ~/.pnpm-store + key: ${{ runner.os }}-pnpm-${{ github.event.inputs.weave_version || 'main' }} restore-keys: | ${{ runner.os }}-pnpm- @@ -88,7 +78,7 @@ jobs: - name: Generate Python SDK documentation env: - WEAVE_SOURCE_PATH: ./weave-source + WEAVE_VERSION: ${{ github.event.inputs.weave_version || 'main' }} run: | source .venv/bin/activate python scripts/generate_python_sdk_docs.py @@ -99,7 +89,7 @@ jobs: - name: Generate TypeScript SDK documentation env: - WEAVE_SOURCE_PATH: ./weave-source + WEAVE_VERSION: ${{ github.event.inputs.weave_version || 'main' }} run: | source .venv/bin/activate python scripts/generate_typescript_sdk_docs.py diff --git a/scripts/README.md b/scripts/README.md index d847b5f6..7e87f264 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -18,6 +18,7 @@ This implementation provides several improvements over the previous approach: - **Native Mintlify Support**: Leverages Mintlify's built-in OpenAPI support for Service API docs - **Clean Output**: Simplified post-processing that maintains documentation quality - **No Custom Processing**: Eliminates complex custom scripts that introduced Socket Security issues +- **Version-based Generation**: Accepts specific Weave versions, tags, or commit hashes ## Scripts @@ -28,11 +29,12 @@ This implementation provides several improvements over the previous approach: ### `generate_python_sdk_docs.py` - Uses lazydocs to generate Python API documentation -- Installs Weave from source for accurate documentation +- Installs specific Weave version from PyPI or GitHub - Post-processes output to add Mintlify frontmatter - Converts .md files to .mdx for Mintlify compatibility ### `generate_typescript_sdk_docs.py` +- Downloads Weave source code for specified version - Uses typedoc with typedoc-plugin-markdown - Generates clean markdown documentation - Post-processes to add Mintlify frontmatter @@ -43,7 +45,7 @@ This implementation provides several improvements over the previous approach: The `.github/workflows/generate-reference-docs.yml` workflow: - Runs weekly on a schedule or manually via workflow_dispatch -- Checks out both the documentation repo and Weave source +- Accepts a Weave version parameter (tag, commit SHA, or branch name) - Generates all three types of documentation - Creates a pull request if changes are detected - Uses caching for faster builds @@ -59,16 +61,17 @@ The `.github/workflows/generate-reference-docs.yml` workflow: pip install -r scripts/requirements.txt ``` -2. Clone or provide path to Weave source: - ```bash - export WEAVE_SOURCE_PATH=/path/to/weave - ``` - -3. Run individual scripts: +2. Generate documentation for a specific version: ```bash + # For latest PyPI version python scripts/generate_service_api_spec.py python scripts/generate_python_sdk_docs.py python scripts/generate_typescript_sdk_docs.py + + # For specific version + export WEAVE_VERSION=v0.50.0 # or commit SHA, or branch name + python scripts/generate_python_sdk_docs.py + python scripts/generate_typescript_sdk_docs.py ``` ### GitHub Action @@ -77,12 +80,19 @@ The workflow can be triggered: - Manually from the Actions tab with optional parameters - Automatically every Monday at 00:00 UTC - Parameters: - - `weave_version`: Branch, tag, or commit SHA (default: main) + - `weave_version`: Version tag (e.g., v0.50.0), commit SHA, or branch name (default: main) - `create_pr`: Whether to create a PR (default: true) +### Supported Version Formats + +- **Latest**: Uses the latest version from PyPI (default) +- **Version tags**: `v0.50.0` or `0.50.0` +- **Commit SHA**: Full or short commit hash +- **Branch names**: `main`, `feature/branch-name`, etc. + ## Requirements - Python 3.11+ - Node.js 18+ - pnpm (will be installed automatically if missing) -- Access to wandb/weave repository \ No newline at end of file +- Internet access to download packages and source code \ No newline at end of file diff --git a/scripts/generate_python_sdk_docs.py b/scripts/generate_python_sdk_docs.py index dddeaffa..daed8640 100755 --- a/scripts/generate_python_sdk_docs.py +++ b/scripts/generate_python_sdk_docs.py @@ -14,19 +14,38 @@ from pathlib import Path -def install_weave_from_source(weave_path): - """Install Weave from source for documentation generation.""" - print(f"Installing Weave from source: {weave_path}") +def install_weave(version="latest"): + """Install Weave package for documentation generation.""" + print(f"Installing Weave version: {version}") + try: - subprocess.run( - [sys.executable, "-m", "pip", "install", "-e", weave_path], - check=True, - capture_output=True, - text=True + if version == "latest": + # Install latest from PyPI + cmd = [sys.executable, "-m", "pip", "install", "weave"] + elif version.startswith("v") or "." in version: + # Looks like a version number (e.g., v0.50.0 or 0.50.0) + version_num = version.lstrip("v") + cmd = [sys.executable, "-m", "pip", "install", f"weave=={version_num}"] + else: + # Assume it's a commit hash or branch name + cmd = [sys.executable, "-m", "pip", "install", + f"git+https://github.com/wandb/weave.git@{version}"] + + subprocess.run(cmd, check=True) + print("✓ Weave installed successfully") + + # Get installed version + result = subprocess.run( + [sys.executable, "-m", "pip", "show", "weave"], + capture_output=True, text=True, check=True ) - print("Weave installed successfully") + for line in result.stdout.split('\n'): + if line.startswith('Version:'): + print(f" Installed version: {line.split(':')[1].strip()}") + break + except subprocess.CalledProcessError as e: - print(f"Error installing Weave: {e.stderr}", file=sys.stderr) + print(f"Error installing Weave: {e}", file=sys.stderr) sys.exit(1) @@ -98,16 +117,11 @@ def post_process_docs(docs_dir): def main(): """Main function.""" - # Check if we're in the Weave repo or need to use a separate source - weave_source = os.environ.get("WEAVE_SOURCE_PATH", "../weave-source") - - if not Path(weave_source).exists(): - print(f"Weave source not found at {weave_source}") - print("Please set WEAVE_SOURCE_PATH environment variable or ensure weave-source exists") - sys.exit(1) + # Get Weave version from environment or use latest + weave_version = os.environ.get("WEAVE_VERSION", "latest") - # Install Weave from source - install_weave_from_source(weave_source) + # Install Weave + install_weave(weave_version) # Generate documentation output_dir = "reference/python-sdk/weave" diff --git a/scripts/generate_typescript_sdk_docs.py b/scripts/generate_typescript_sdk_docs.py index 54f09eb3..a9613d95 100755 --- a/scripts/generate_typescript_sdk_docs.py +++ b/scripts/generate_typescript_sdk_docs.py @@ -11,7 +11,10 @@ import shutil import subprocess import sys +import tempfile +import tarfile from pathlib import Path +import requests def check_node_dependencies(): @@ -36,6 +39,45 @@ def check_node_dependencies(): sys.exit(1) +def download_weave_source(version="main"): + """Download Weave source code for a specific version.""" + print(f"Downloading Weave source code for version: {version}") + + # Create temporary directory + temp_dir = tempfile.mkdtemp() + + try: + # Download tarball from GitHub + url = f"https://github.com/wandb/weave/archive/{version}.tar.gz" + response = requests.get(url, stream=True) + response.raise_for_status() + + # Save and extract tarball + tarball_path = Path(temp_dir) / "weave.tar.gz" + with open(tarball_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + # Extract + with tarfile.open(tarball_path, 'r:gz') as tar: + tar.extractall(temp_dir) + + # Find the extracted directory + extracted_dirs = [d for d in Path(temp_dir).iterdir() if d.is_dir() and d.name.startswith('weave-')] + if not extracted_dirs: + raise Exception("Could not find extracted Weave directory") + + weave_dir = extracted_dirs[0] + print(f"✓ Downloaded and extracted Weave source to {weave_dir}") + + return weave_dir + + except Exception as e: + print(f"Error downloading Weave source: {e}", file=sys.stderr) + shutil.rmtree(temp_dir, ignore_errors=True) + sys.exit(1) + + def setup_typescript_project(weave_source): """Set up the TypeScript project and install dependencies.""" sdk_path = Path(weave_source) / "sdks" / "node" @@ -154,39 +196,44 @@ def main(): # Check Node.js dependencies check_node_dependencies() - # Get Weave source path - weave_source = os.environ.get("WEAVE_SOURCE_PATH", "../weave-source") - - if not Path(weave_source).exists(): - print(f"Weave source not found at {weave_source}") - print("Please set WEAVE_SOURCE_PATH environment variable") - sys.exit(1) - - # Setup TypeScript project - sdk_path = setup_typescript_project(weave_source) + # Get Weave version from environment or use main + weave_version = os.environ.get("WEAVE_VERSION", "main") - # Output directory - current_dir = Path.cwd() - output_dir = current_dir / "reference" / "typescript-sdk" / "weave" + # Download Weave source + weave_source = download_weave_source(weave_version) - # Clean existing docs - if output_dir.exists(): - shutil.rmtree(output_dir) - - # Generate typedoc config - generate_typedoc_config(sdk_path, output_dir) - - # Run typedoc - run_typedoc(sdk_path, output_dir) - - # Change back to original directory - os.chdir(current_dir) - - # Post-process for Mintlify - print("\nPost-processing documentation for Mintlify...") - post_process_typescript_docs(output_dir) - - print("\nTypeScript SDK documentation generation complete!") + try: + # Setup TypeScript project + sdk_path = setup_typescript_project(weave_source) + + # Output directory + current_dir = Path.cwd() + output_dir = current_dir / "reference" / "typescript-sdk" / "weave" + + # Clean existing docs + if output_dir.exists(): + shutil.rmtree(output_dir) + + # Generate typedoc config + generate_typedoc_config(sdk_path, output_dir) + + # Run typedoc + run_typedoc(sdk_path, output_dir) + + # Change back to original directory + os.chdir(current_dir) + + # Post-process for Mintlify + print("\nPost-processing documentation for Mintlify...") + post_process_typescript_docs(output_dir) + + print("\nTypeScript SDK documentation generation complete!") + + finally: + # Clean up temporary directory + if weave_source: + temp_dir = weave_source.parent + shutil.rmtree(temp_dir, ignore_errors=True) if __name__ == "__main__": From 6c69141ddcf262605cc46027203e7bc7cd8b51c8 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Wed, 16 Jul 2025 17:41:17 -0700 Subject: [PATCH 3/5] fix: Add PR trigger for testing workflow - Add pull_request trigger for testing (to be removed before merging) - Add PR test mode reporting to show what would happen in production - Prevent actual PR creation when running from a pull request --- .github/workflows/generate-reference-docs.yml | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/.github/workflows/generate-reference-docs.yml b/.github/workflows/generate-reference-docs.yml index 49f17efc..7a370800 100644 --- a/.github/workflows/generate-reference-docs.yml +++ b/.github/workflows/generate-reference-docs.yml @@ -14,6 +14,14 @@ on: default: true type: boolean + # Temporary trigger for testing - REMOVE BEFORE MERGING + pull_request: + branches: [main] + paths: + - '.github/workflows/generate-reference-docs.yml' + - 'scripts/generate_*.py' + - 'scripts/requirements.txt' + schedule: # Run weekly on Mondays at 00:00 UTC - cron: '0 0 * * 1' @@ -108,8 +116,24 @@ jobs: echo "No changes detected" fi + # For PR testing, just report what would happen + - name: Report Results (PR Test Mode) + if: github.event_name == 'pull_request' + run: | + echo "This is a test run on a PR. In production, this would:" + if [[ "${{ steps.check_changes.outputs.changes }}" == "true" ]]; then + echo "✅ Create a PR with the generated documentation changes" + echo "" + echo "Changed files:" + git status --porcelain + else + echo "ℹ️ Skip PR creation (no changes detected)" + fi + echo "" + echo "Weave version used: ${{ github.event.inputs.weave_version || 'main' }}" + - name: Create Pull Request - if: steps.check_changes.outputs.changes == 'true' && (github.event.inputs.create_pr == 'true' || github.event_name == 'schedule') + if: steps.check_changes.outputs.changes == 'true' && github.event_name != 'pull_request' && (github.event.inputs.create_pr == 'true' || github.event_name == 'schedule') uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.GITHUB_TOKEN }} From b6b1bab412204c339558d700f67d438d9b65ea63 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Wed, 16 Jul 2025 17:53:58 -0700 Subject: [PATCH 4/5] fix: Improve error handling and default to latest PyPI version - Add module import testing before documentation generation - Better error messages and debugging output - Default to 'latest' PyPI version instead of 'main' branch - Add fallback to latest PyPI if main branch fails - Handle 'latest' version properly in TypeScript script --- .github/workflows/generate-reference-docs.yml | 10 +++-- scripts/generate_python_sdk_docs.py | 45 +++++++++++++++++-- scripts/generate_typescript_sdk_docs.py | 12 +++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/.github/workflows/generate-reference-docs.yml b/.github/workflows/generate-reference-docs.yml index 7a370800..e9336226 100644 --- a/.github/workflows/generate-reference-docs.yml +++ b/.github/workflows/generate-reference-docs.yml @@ -86,10 +86,14 @@ jobs: - name: Generate Python SDK documentation env: - WEAVE_VERSION: ${{ github.event.inputs.weave_version || 'main' }} + WEAVE_VERSION: ${{ github.event.inputs.weave_version || 'latest' }} run: | source .venv/bin/activate - python scripts/generate_python_sdk_docs.py + python scripts/generate_python_sdk_docs.py || { + echo "Python SDK generation failed with exit code $?" + echo "Attempting with latest PyPI version..." + WEAVE_VERSION=latest python scripts/generate_python_sdk_docs.py + } echo "Python SDK documentation generated" - name: Install pnpm @@ -97,7 +101,7 @@ jobs: - name: Generate TypeScript SDK documentation env: - WEAVE_VERSION: ${{ github.event.inputs.weave_version || 'main' }} + WEAVE_VERSION: ${{ github.event.inputs.weave_version || 'latest' }} run: | source .venv/bin/activate python scripts/generate_typescript_sdk_docs.py diff --git a/scripts/generate_python_sdk_docs.py b/scripts/generate_python_sdk_docs.py index daed8640..c8369e3e 100755 --- a/scripts/generate_python_sdk_docs.py +++ b/scripts/generate_python_sdk_docs.py @@ -31,6 +31,7 @@ def install_weave(version="latest"): cmd = [sys.executable, "-m", "pip", "install", f"git+https://github.com/wandb/weave.git@{version}"] + print(f"Running: {' '.join(cmd)}") subprocess.run(cmd, check=True) print("✓ Weave installed successfully") @@ -49,6 +50,22 @@ def install_weave(version="latest"): sys.exit(1) +def test_module_imports(modules): + """Test if modules can be imported.""" + print("\nTesting module imports...") + available_modules = [] + + for module in modules: + try: + __import__(module) + print(f" ✓ {module} - available") + available_modules.append(module) + except ImportError as e: + print(f" ✗ {module} - not available: {e}") + + return available_modules + + def generate_docs_with_lazydocs(output_dir): """Generate documentation using lazydocs.""" output_path = Path(output_dir) @@ -63,8 +80,17 @@ def generate_docs_with_lazydocs(output_dir): "weave.trace_server.trace_server_interface", ] - for module in modules_to_document: - print(f"Generating documentation for {module}...") + # Test which modules are available + available_modules = test_module_imports(modules_to_document) + + if not available_modules: + print("No modules available to document!", file=sys.stderr) + sys.exit(1) + + print(f"\nGenerating documentation for {len(available_modules)} modules...") + + for module in available_modules: + print(f"\nGenerating documentation for {module}...") # Use lazydocs command line interface for cleaner output cmd = [ @@ -76,17 +102,28 @@ def generate_docs_with_lazydocs(output_dir): ] try: - subprocess.run(cmd, check=True, capture_output=True, text=True) + result = subprocess.run(cmd, check=True, capture_output=True, text=True) print(f" ✓ Generated docs for {module}") + if result.stdout: + print(f" Output: {result.stdout}") except subprocess.CalledProcessError as e: print(f" ✗ Error generating docs for {module}: {e.stderr}") + # Continue with other modules instead of exiting def post_process_docs(docs_dir): """Post-process the generated documentation for Mintlify.""" docs_path = Path(docs_dir) - for md_file in docs_path.rglob("*.md"): + # Check if any files were generated + md_files = list(docs_path.rglob("*.md")) + if not md_files: + print(f"Warning: No .md files found in {docs_dir}") + return + + print(f"\nPost-processing {len(md_files)} documentation files...") + + for md_file in md_files: content = md_file.read_text() # Add Mintlify frontmatter diff --git a/scripts/generate_typescript_sdk_docs.py b/scripts/generate_typescript_sdk_docs.py index a9613d95..9524f831 100755 --- a/scripts/generate_typescript_sdk_docs.py +++ b/scripts/generate_typescript_sdk_docs.py @@ -43,6 +43,18 @@ def download_weave_source(version="main"): """Download Weave source code for a specific version.""" print(f"Downloading Weave source code for version: {version}") + # Handle "latest" by fetching the latest release tag + if version == "latest": + try: + api_url = "https://api.github.com/repos/wandb/weave/releases/latest" + response = requests.get(api_url) + response.raise_for_status() + version = response.json()["tag_name"] + print(f" Using latest release: {version}") + except Exception as e: + print(f" Warning: Could not fetch latest release, using main branch: {e}") + version = "main" + # Create temporary directory temp_dir = tempfile.mkdtemp() From c1007f4bf3889ae3198baa63ee37f4ca35756533 Mon Sep 17 00:00:00 2001 From: Matt Linville Date: Thu, 17 Jul 2025 11:48:17 -0700 Subject: [PATCH 5/5] fix: Use compatible typedoc versions to resolve ESM/CommonJS issues - Pin typedoc to 0.25.13 and typedoc-plugin-markdown to 3.17.1 - Remove unsupported config options (hideBreadcrumbs, hideInPageTOC) - Add better error handling and diagnostic output - Check existing package.json versions before installing --- scripts/generate_typescript_sdk_docs.py | 62 ++++++++++++++++++++----- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/scripts/generate_typescript_sdk_docs.py b/scripts/generate_typescript_sdk_docs.py index 9524f831..e60a5a87 100755 --- a/scripts/generate_typescript_sdk_docs.py +++ b/scripts/generate_typescript_sdk_docs.py @@ -90,6 +90,21 @@ def download_weave_source(version="main"): sys.exit(1) +def check_package_json_versions(sdk_path): + """Check if package.json has compatible typedoc versions.""" + package_json_path = sdk_path / "package.json" + if package_json_path.exists(): + with open(package_json_path, 'r') as f: + package_data = json.load(f) + + dev_deps = package_data.get("devDependencies", {}) + print("\nExisting TypeScript dependencies:") + for pkg in ["typedoc", "typedoc-plugin-markdown", "typescript"]: + if pkg in dev_deps: + print(f" {pkg}: {dev_deps[pkg]}") + return + + def setup_typescript_project(weave_source): """Set up the TypeScript project and install dependencies.""" sdk_path = Path(weave_source) / "sdks" / "node" @@ -100,18 +115,22 @@ def setup_typescript_project(weave_source): print(f"Setting up TypeScript project at {sdk_path}") + # Check existing versions + check_package_json_versions(sdk_path) + # Install dependencies os.chdir(sdk_path) try: - print("Installing dependencies...") + print("\nInstalling dependencies...") subprocess.run(["pnpm", "install"], check=True) - # Install typedoc and markdown plugin - print("Installing typedoc...") + # Install compatible versions of typedoc and markdown plugin + # Use specific versions that are known to work together + print("Installing compatible typedoc versions...") subprocess.run([ "pnpm", "add", "-D", - "typedoc", - "typedoc-plugin-markdown" + "typedoc@0.25.13", # Use a stable version that works with CommonJS + "typedoc-plugin-markdown@3.17.1" # Compatible with typedoc 0.25.x ], check=True) print("✓ Dependencies installed successfully") @@ -124,19 +143,22 @@ def setup_typescript_project(weave_source): def generate_typedoc_config(sdk_path, output_path): """Generate typedoc configuration.""" + # Use options compatible with typedoc 0.25.x config = { "entryPoints": ["src/index.ts"], "out": str(output_path), "plugin": ["typedoc-plugin-markdown"], "readme": "none", - "hideBreadcrumbs": True, - "hideInPageTOC": True, "disableSources": True, "excludePrivate": True, "excludeProtected": True, "excludeInternal": True, "githubPages": False, - "cleanOutputDir": True + "cleanOutputDir": True, + "hideGenerator": True, + "navigationLinks": { + "GitHub": "https://github.com/wandb/weave" + } } config_path = sdk_path / "typedoc.json" @@ -148,14 +170,30 @@ def generate_typedoc_config(sdk_path, output_path): def run_typedoc(sdk_path, output_path): """Run typedoc to generate documentation.""" - print(f"Generating TypeScript documentation to {output_path}...") + print(f"\nGenerating TypeScript documentation to {output_path}...") os.chdir(sdk_path) try: - subprocess.run([ + # Run typedoc with error output + result = subprocess.run([ "pnpm", "exec", "typedoc" - ], check=True) - print("✓ TypeScript documentation generated successfully") + ], capture_output=True, text=True, check=False) + + if result.returncode != 0: + print("TypeDoc output:") + if result.stdout: + print(result.stdout) + if result.stderr: + print("Errors:", result.stderr) + + # Check if docs were still generated despite warnings + if output_path.exists() and any(output_path.iterdir()): + print("⚠ TypeDoc completed with warnings, but documentation was generated") + else: + raise subprocess.CalledProcessError(result.returncode, result.args) + else: + print("✓ TypeScript documentation generated successfully") + except subprocess.CalledProcessError as e: print(f"✗ Failed to generate documentation: {e}", file=sys.stderr) sys.exit(1)