diff --git a/.gitignore b/.gitignore index fc82d04..72ba3c2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,11 @@ build docs/community/contribute.md docs/community/code-of-conduct.md docs/community/security.md -docs/community/sigs.md \ No newline at end of file +docs/community/sigs.md + +# Guide remote content files +docs/guide/guide.md +docs/guide/Installation/prerequisites.md +docs/guide/Installation/inference-scheduling.md +docs/guide/Installation/pd-disaggregation.md +docs/guide/Installation/wide-ep-lws.md \ No newline at end of file diff --git a/docs/guide/Installation/arch.jpg b/docs/guide/Installation/arch.jpg deleted file mode 100644 index f5de27c..0000000 Binary files a/docs/guide/Installation/arch.jpg and /dev/null differ diff --git a/docs/guide/Installation/prerequisites.md b/docs/guide/Installation/prerequisites.md deleted file mode 100644 index 800cde5..0000000 --- a/docs/guide/Installation/prerequisites.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -sidebar_position: 1 -sidebar_label: Prerequisites ---- - -# Prerequisites for running the llm-d QuickStart - - -## Client Configuration - -### Get the code - -Clone the llm-d-deployer repository. - -```bash -git clone https://github.com/llm-d/llm-d-deployer.git -``` - -Navigate to the quickstart directory - -```bash -cd llm-d-deployer/quickstart -``` - -### Required tools - -Following prerequisite are required for the installer to work. - -- [yq (mikefarah) – installation](https://github.com/mikefarah/yq?tab=readme-ov-file#install) -- [jq – download & install guide](https://stedolan.github.io/jq/download/) -- [git – installation guide](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) -- [Helm – quick-start install](https://helm.sh/docs/intro/install/) -- [Kustomize – official install docs](https://kubectl.docs.kubernetes.io/installation/kustomize/) -- [kubectl – install & setup](https://kubernetes.io/docs/tasks/tools/install-kubectl/) - -You can use the installer script that installs all the required dependencies. Currently only Linux is supported. - -```bash -# Currently Linux only -./install-deps.sh -``` - -### Required credentials and configuration - -- [llm-d-deployer GitHub repo – clone here](https://github.com/llm-d/llm-d-deployer.git) -- [HuggingFace HF_TOKEN](https://huggingface.co/docs/hub/en/security-tokens) with download access for the model you want to use. By default the sample application will use [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct). - -> ⚠️ Your Hugging Face account must have access to the model you want to use. You may need to visit Hugging Face [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) and -> accept the usage terms if you have not already done so. - -### Target Platforms - -Since the llm-d-deployer is based on helm charts, llm-d can be deployed on a variety of Kubernetes platforms. As more platforms are supported, the installer will be updated to support them. - -Documentation for example cluster setups are provided in the [infra](https://github.com/llm-d/llm-d-deployer/tree/main/quickstart/infra) directory of the llm-d-deployer repository. - -- [OpenShift on AWS](https://github.com/llm-d/llm-d-deployer/tree/main/quickstart/infra/openshift-aws.md) - - -#### Minikube - -This can be run on a minimum ec2 node type [g6e.12xlarge](https://aws.amazon.com/ec2/instance-types/g6e/) (4xL40S 48GB but only 2 are used by default) to infer the model meta-llama/Llama-3.2-3B-Instruct that will get spun up. - -> ⚠️ If your cluster has no available GPUs, the **prefill** and **decode** pods will remain in **Pending** state. - -Verify you have properly installed the container toolkit with the runtime of your choice. - -```bash -# Podman -podman run --rm --security-opt=label=disable --device=nvidia.com/gpu=all ubuntu nvidia-smi -# Docker -sudo docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi -``` - -#### OpenShift - -- OpenShift - This quickstart was tested on OpenShift 4.17. Older versions may work but have not been tested. -- NVIDIA GPU Operator and NFD Operator - The installation instructions can be found [here](https://docs.nvidia.com/datacenter/cloud-native/openshift/latest/steps-overview.html). -- NO Service Mesh or Istio installation as Istio CRDs will conflict with the gateway -- Cluster administrator privileges are required to install the llm-d cluster scoped resources diff --git a/docs/guide/Installation/quickstart.md b/docs/guide/Installation/quickstart.md deleted file mode 100644 index 0f28458..0000000 --- a/docs/guide/Installation/quickstart.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -sidebar_position: 2 -sidebar_label: Quick Start installer ---- - -# Trying llm-d via the Quick Start installer -Getting Started with llm-d on Kubernetes. For specific instructions on how to install llm-d on minikube, see the [README-minikube.md](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/README-minikube.md) instructions. - -For more information on llm-d in general, see the llm-d git repository [here](https://github.com/llm-d/llm-d) and website [here](https://llm-d.ai). - -## Overview - -This guide will walk you through the steps to install and deploy llm-d on a Kubernetes cluster, using an opinionated flow in order to get up and running as quickly as possible. - -For more information on llm-d, see the llm-d git repository [here](https://github.com/llm-d/llm-d) and website [here](https://llmd.io). - -## Prerequisites - -First ensure you have all the tools and resources as described in [Prerequisites](./prerequisites.md) - - - -## llm-d Installation - - - Change to the directory holding your clone of the llm-d-deployer code - - Navigate to the quickstart directory, e.g. - - ```bash - cd llm-d-deployer/quickstart - ``` - -Only a single installation of llm-d on a cluster is currently supported. In the future, multiple model services will be supported. Until then, [uninstall llm-d](#uninstall) before reinstalling. - -The llm-d-deployer contains all the helm charts necessary to deploy llm-d. To facilitate the installation of the helm charts, the `llmd-installer.sh` script is provided. This script will populate the necessary manifests in the `manifests` directory. -After this, it will apply all the manifests in order to bring up the cluster. - -The llmd-installer.sh script aims to simplify the installation of llm-d using the llm-d-deployer as it's main function. It scripts as many of the steps as possible to make the installation process more streamlined. This includes: - -- Installing the GAIE infrastructure -- Creating the namespace with any special configurations -- Creating the pull secret to download the images -- Creating the model service CRDs -- Applying the helm charts -- Deploying the sample app (model service) - -It also supports uninstalling the llm-d infrastructure and the sample app. - -Before proceeding with the installation, ensure you have completed the prerequisites and are able to issue `kubectl` or `oc` commands to your cluster by configuring your `~/.kube/config` file or by using the `oc login` command. - -### Usage - -The installer needs to be run from the `llm-d-deployer/quickstart` directory as a cluster admin with CLI access to the cluster. - -```bash -./llmd-installer.sh [OPTIONS] -``` - -### Flags - -| Flag | Description | Example | -|--------------------------------------|---------------------------------------------------------------|------------------------------------------------------------------| -| `-z`, `--storage-size SIZE` | Size of storage volume | `./llmd-installer.sh --storage-size 15Gi` | -| `-c`, `--storage-class CLASS` | Storage class to use (default: efs-sc) | `./llmd-installer.sh --storage-class ocs-storagecluster-cephfs` | -| `-n`, `--namespace NAME` | K8s namespace (default: llm-d) | `./llmd-installer.sh --namespace foo` | -| `-f`, `--values-file PATH` | Path to Helm values.yaml file (default: values.yaml) | `./llmd-installer.sh --values-file /path/to/values.yaml` | -| `-u`, `--uninstall` | Uninstall the llm-d components from the current cluster | `./llmd-installer.sh --uninstall` | -| `-d`, `--debug` | Add debug mode to the helm install | `./llmd-installer.sh --debug` | -| `-i`, `--skip-infra` | Skip the infrastructure components of the installation | `./llmd-installer.sh --skip-infra` | -| `-t`, `--download-timeout` | Timeout for model download job | `./llmd-installer.sh --download-timeout` | -| `-D`, `--download-model` | Download the model to PVC from Hugging Face | `./llmd-installer.sh --download-model` | -| `-m`, `--disable-metrics-collection` | Disable metrics collection (Prometheus will not be installed) | `./llmd-installer.sh --disable-metrics-collection` | -| `-h`, `--help` | Show this help and exit | `./llmd-installer.sh --help` | - -## Examples - -### Install llm-d on an Existing Kubernetes Cluster - -```bash -export HF_TOKEN="your-token" -./llmd-installer.sh -``` - -### Install on OpenShift - -Before running the installer, ensure you have logged into the cluster as a cluster administrator. For example: - -```bash -oc login --token=sha256~yourtoken --server=https://api.yourcluster.com:6443 -``` - -```bash -export HF_TOKEN="your-token" -./llmd-installer.sh -``` - -### Validation - -The inference-gateway serves as the HTTP ingress point for all inference requests in our deployment. -It’s implemented as a Kubernetes Gateway (`gateway.networking.k8s.io/v1`) using either kgateway or istio as the -gatewayClassName, and sits in front of your inference pods to handle path-based routing, load balancing, retries, -and metrics. This example validates that the gateway itself is routing your completion requests correctly. -You can execute the [`test-request.sh`](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/test-request.sh) script in the quickstart folder to test on the cluster. - - -```bash -# Default options (the model id will be discovered via /v1/models) -./test-request.sh - -# Non-default namespace/model -./test-request.sh -n -m --minikube -``` - -> If you receive an error indicating PodSecurity "restricted" violations when running the smoke-test script, you -> need to remove the restrictive PodSecurity labels from the namespace. Once these labels are removed, re-run the -> script and it should proceed without PodSecurity errors. -> Run the following command: - -```bash -kubectl label namespace \ - pod-security.kubernetes.io/warn- \ - pod-security.kubernetes.io/warn-version- \ - pod-security.kubernetes.io/audit- \ - pod-security.kubernetes.io/audit-version- -``` - -### Customizing your deployment - -The helm charts can be customized by modifying the [values.yaml](https://github.com/llm-d/llm-d-deployer/blob/main/charts/llm-d/values.yaml) file. However, it is recommended to override values in the `values.yaml` by creating a custom yaml file and passing it to the installer using the `--values-file` flag. -Several examples are provided in the [examples](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/examples) directory. You would invoke the installer with the following command: - -```bash -./llmd-installer.sh --values-file ./examples/base.yaml -``` - -These files are designed to be used as a starting point to customize your deployment. Refer to the [values.yaml](https://github.com/llm-d/llm-d-deployer/blob/main/charts/llm-d/values.yaml) file for all the possible options. - -#### Sample Application and Model Configuration - -Some of the more common options for changing the sample application model are: - -- `sampleApplication.model.modelArtifactURI` - The URI of the model to use. This is the path to the model either to Hugging Face (`hf://meta-llama/Llama-3.2-3B-Instruct`) or a persistent volume claim (PVC) (`pvc://model-pvc/meta-llama/Llama-3.2-1B-Instruct`). Using a PVC can be paired with the `--download-model` flag to download the model to PVC. -- `sampleApplication.model.modelName` - The name of the model to use. This will be used in the naming of deployed resources and also the model ID when using the API. -- `sampleApplication.baseConfigMapRefName` - The name of the preset base configuration to use. This will depend on the features you want to enable. -- `sampleApplication.prefill.replicas` - The number of prefill replicas to deploy. -- `sampleApplication.decode.replicas` - The number of decode replicas to deploy. - -```yaml -sampleApplication: - model: - modelArtifactURI: hf://meta-llama/Llama-3.2-1B-Instruct - modelName: "llama3-1B" - baseConfigMapRefName: basic-gpu-with-nixl-and-redis-lookup-preset - prefill: - replicas: 1 - decode: - replicas: 1 -``` - -#### Feature Flags - -`redis.enabled` - Whether to enable Redis needed to enable the KV Cache Aware Scorer -`modelservice.epp.defaultEnvVarsOverride` - The environment variables to override for the model service. For each feature flag, you can set the value to `true` or `false` to enable or disable the feature. - -```yaml -redis: - enabled: true -modelservice: - epp: - defaultEnvVarsOverride: - - name: ENABLE_KVCACHE_AWARE_SCORER - value: "false" - - name: ENABLE_PREFIX_AWARE_SCORER - value: "true" - - name: ENABLE_LOAD_AWARE_SCORER - value: "true" - - name: ENABLE_SESSION_AWARE_SCORER - value: "false" - - name: PD_ENABLED - value: "false" - - name: PD_PROMPT_LEN_THRESHOLD - value: "10" - - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER - value: "false" - - name: PREFILL_ENABLE_LOAD_AWARE_SCORER - value: "false" - - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER - value: "false" - - name: PREFILL_ENABLE_SESSION_AWARE_SCORER - value: "false" -``` - -### Metrics Collection - -llm-d includes built-in support for metrics collection using Prometheus and Grafana. This feature is enabled by default but can be disabled using the -`--disable-metrics-collection` flag during installation. llm-d applies ServiceMonitors for vLLM and inference-gateway services to trigger Prometheus -scrape targets. In OpenShift, the built-in user workload monitoring Prometheus stack can be utilized. In Kubernetes, Prometheus and Grafana are installed from the -prometheus-community [kube-prometheus-stack helm charts](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack). -The [llm-d metrics overview](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/metrics-overview.md) lists the metrics scraped with a default llm-d install. - -#### Accessing the Metrics UIs - -If running on OpenShift, skip to [OpenShift and Grafana](#openshift-and-grafana). - -#### Port Forwarding - -- Prometheus (port 9090): - -```bash -kubectl port-forward -n llm-d-monitoring --address 0.0.0.0 svc/prometheus-kube-prometheus-prometheus 9090:9090 -``` - -- Grafana (port 3000): - -```bash -kubectl port-forward -n llm-d-monitoring --address 0.0.0.0 svc/prometheus-grafana 3000:80 -``` - -Access the User Interfaces at: - -- Prometheus: \ -- Grafana: \ (default credentials: admin/admin) - -#### Grafana Dashboards - -Import the [llm-d dashboard](https://github.com/llm-d/llm-d-deployer/tree/main/quickstart/grafana/dashboards/llm-d-dashboard.json) from the Grafana UI. Go to `Dashboards -> New -> Import`. -Similarly, import the [inference-gateway dashboard](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/tools/dashboards/inference_gateway.json) -from the gateway-api-inference-extension repository. Or, if the Grafana Operator is installed in your environment, you might follow the [Grafana setup guide](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/grafana-setup.md) -to install the dashboards as `GrafanaDashboard` custom resources. - -#### OpenShift and Grafana - -If running on OpenShift with user workload monitoring enabled, you can access the metrics through the OpenShift console: - -1. Navigate to the OpenShift console -2. In the left navigation bar, click on "Observe" -3. You can access: - - Metrics: Click on "Metrics" to view and query metrics using the built-in Prometheus UI - - Targets: Click on "Targets" to see all monitored endpoints and their status - -The metrics are automatically integrated into the OpenShift monitoring stack. The llm-d-deployer does not install Grafana on OpenShift, -but it's recommended that users install Grafana to view metrics and import dashboards. - -Follow the [Grafana setup guide](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/grafana-setup.md) in the llm-d-deployer repository. -The guide includes manifests to install the following: - -- Grafana instance -- Grafana Prometheus datasource from user workload monitoring stack -- Grafana llm-d dashboard - -#### Security Note - -When running in a cloud environment (like EC2), make sure to: - -1. Configure your security groups to allow inbound traffic on ports 9090 and 3000 (if using port-forwarding) -2. Use the `--address 0.0.0.0` flag with port-forward to allow external access -3. Consider setting up proper authentication for production environments -4. If using ingress, ensure proper TLS configuration and authentication -5. For OpenShift, consider using the built-in OAuth integration for Grafana - -### Troubleshooting - -The various images can take some time to download depending on your connectivity. Watching events -and logs of the prefill and decode pods is a good place to start. Here are some examples to help -you get started. - -```bash -# View the status of the pods in the default llm-d namespace. Replace "llm-d" if you used a custom namespace on install -kubectl get pods -n llm-d - -# Describe all prefill pods: -kubectl describe pods -l llm-d.ai/role=prefill -n llm-d - -# Fetch logs from each prefill pod: -kubectl logs -l llm-d.ai/role=prefill --all-containers=true -n llm-d --tail=200 - -# Describe all decode pods: -kubectl describe pods -l llm-d.ai/role=decode -n llm-d - -# Fetch logs from each decode pod: -kubectl logs -l llm-d.ai/role=decode --all-containers=true -n llm-d --tail=200 - -# Describe all endpoint-picker pods: -kubectl describe pod -n llm-d -l llm-d.ai/epp - -# Fetch logs from each endpoint-picker pod: -kubectl logs -n llm-d -l llm-d.ai/epp --all-containers=true --tail=200 -``` - -More examples of debugging logs can be found [here](https://github.com/llm-d/llm-d-deployer/blob/main/quickstart/examples/no-features/README.md). - -### Uninstall - -This will remove llm-d resources from the cluster. This is useful, especially for test/dev if you want to -make a change, simply uninstall and then run the installer again with any changes you make. - -```bash -./llmd-installer.sh --uninstall -``` \ No newline at end of file diff --git a/docs/guide/guide.md b/docs/guide/guide.md deleted file mode 100644 index c61ba37..0000000 --- a/docs/guide/guide.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -sidebar_position: 1 ---- - -# llm-d User Guide - -The user guide is organized in sections to help you get started with llm-d and then tailor the configuration to your resources and application needs. It is currently focused on the Quick Start via the llm-d-deployer Helm chart. - -**What is llm-d?** - -llm-d is an open source project providing distributed inferencing for GenAI runtimes on any Kubernetes cluster. Its highly performant, scalable architecture helps reduce costs through a spectrum of hardware efficiency improvements. The project prioritizes ease of deployment+use as well as SRE needs + day 2 operations associated with running large GPU clusters. - -[For more information check out the Architecture Documentation](/docs/architecture/00_architecture.mdx) - -## Installation: Start here to minimize your frustration - -This guide will walk you through the steps to install and deploy the llm-d quickstart demo on a Kubernetes cluster. - - - [Prerequisites](./guide/Installation/prerequisites) Make sure your compute resources and system configuration are ready - - [Quick Start](./guide/Installation/quickstart) If your resources are ready, "kick the tires" with our Quick Start! - - - - diff --git a/remote-content/remote-content.js b/remote-content/remote-content.js index 25b7416..81ad860 100644 --- a/remote-content/remote-content.js +++ b/remote-content/remote-content.js @@ -6,6 +6,13 @@ import codeOfConductSource from './remote-sources/code-of-conduct.js'; import securitySource from './remote-sources/security.js'; import sigsSource from './remote-sources/sigs.js'; +// Import guide remote content sources +import guideExamplesSource from './remote-sources/guide-examples.js'; +import guidePrerequisitesSource from './remote-sources/guide-prerequisites.js'; +import guideInferenceSchedulingSource from './remote-sources/guide-inference-scheduling.js'; +import guidePdDisaggregationSource from './remote-sources/guide-pd-disaggregation.js'; +import guideWideEpLwsSource from './remote-sources/guide-wide-ep-lws.js'; + /** * Remote Content Plugin System * @@ -30,6 +37,13 @@ const remoteContentPlugins = [ securitySource, sigsSource, + // Guide remote content sources + guideExamplesSource, + guidePrerequisitesSource, + guideInferenceSchedulingSource, + guidePdDisaggregationSource, + guideWideEpLwsSource, + // Add more remote sources here ]; diff --git a/remote-content/remote-sources/guide-examples.js b/remote-content/remote-sources/guide-examples.js new file mode 100644 index 0000000..e7d6aa4 --- /dev/null +++ b/remote-content/remote-sources/guide-examples.js @@ -0,0 +1,61 @@ +/** + * Guide Examples Remote Content + * + * Downloads the examples README.md file from the llm-d-infra repository + * and transforms it into docs/guide/guide.md (landing page) + */ + +import { createContentWithSource } from './utils.js'; + +export default [ + 'docusaurus-plugin-remote-content', + { + // Basic configuration + name: 'guide-examples', + sourceBaseUrl: 'https://raw.githubusercontent.com/llm-d-incubation/llm-d-infra/main/', + outDir: 'docs/guide', + documents: ['quickstart/examples/README.md'], + + // Plugin behavior + noRuntimeDownloads: false, // Download automatically when building + performCleanup: true, // Clean up files after build + + // Transform the content for this specific document + modifyContent(filename, content) { + if (filename === 'quickstart/examples/README.md') { + return createContentWithSource({ + title: 'llm-d User Guide', + description: 'Getting started with llm-d and exploring well-lit paths for different use cases', + sidebarLabel: 'User Guide', + sidebarPosition: 1, + filename: 'quickstart/examples/README.md', + newFilename: 'guide.md', + repoUrl: 'https://github.com/llm-d-incubation/llm-d-infra', + branch: 'main', + content, + // Transform content to work in docusaurus context + contentTransform: (content) => content + // Add what is llm-d section before the main content + .replace(/^# /, `**What is llm-d?** + +llm-d is an open source project providing distributed inferencing for GenAI runtimes on any Kubernetes cluster. Its highly performant, scalable architecture helps reduce costs through a spectrum of hardware efficiency improvements. The project prioritizes ease of deployment+use as well as SRE needs + day 2 operations associated with running large GPU clusters. + +[For more information check out the Architecture Documentation](/docs/architecture/00_architecture.mdx) + +# `) + // Fix relative links to work within the guide section + .replace(/README\.md/g, '') + // Fix specific broken links + .replace(/\]\(TBD\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/)') + .replace(/\]\(\.\/(sim)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/sim/README.md)') + // Fix Installation links to work within Docusaurus guide structure + // Use absolute paths within the docs structure to ensure correct resolution + .replace(/\]\(\.\/(inference-scheduling)\)/g, '](/docs/guide/Installation/$1)') + .replace(/\]\(\.\/(pd-disaggregation)\)/g, '](/docs/guide/Installation/$1)') + .replace(/\]\(\.\/(wide-ep-lws)\)/g, '](/docs/guide/Installation/$1)') + }); + } + return undefined; + }, + }, +]; \ No newline at end of file diff --git a/remote-content/remote-sources/guide-inference-scheduling.js b/remote-content/remote-sources/guide-inference-scheduling.js new file mode 100644 index 0000000..3aa0b8f --- /dev/null +++ b/remote-content/remote-sources/guide-inference-scheduling.js @@ -0,0 +1,58 @@ +/** + * Guide Inference Scheduling Remote Content + * + * Downloads the inference scheduling README.md file from the llm-d-infra repository + * and transforms it into docs/guide/Installation/inference-scheduling.md + */ + +import { createContentWithSource } from './utils.js'; + +export default [ + 'docusaurus-plugin-remote-content', + { + // Basic configuration + name: 'guide-inference-scheduling', + sourceBaseUrl: 'https://raw.githubusercontent.com/llm-d-incubation/llm-d-infra/main/', + outDir: 'docs/guide/Installation', + documents: ['quickstart/examples/inference-scheduling/README.md'], + + // Plugin behavior + noRuntimeDownloads: false, // Download automatically when building + performCleanup: true, // Clean up files after build + + // Transform the content for this specific document + modifyContent(filename, content) { + if (filename === 'quickstart/examples/inference-scheduling/README.md') { + return createContentWithSource({ + title: 'Inference Scheduling', + description: 'Well-lit path for inference scheduling in llm-d', + sidebarLabel: 'Inference Scheduling', + sidebarPosition: 2, + filename: 'quickstart/examples/inference-scheduling/README.md', + newFilename: 'inference-scheduling.md', + repoUrl: 'https://github.com/llm-d-incubation/llm-d-infra', + branch: 'main', + content, + // Transform content to work in docusaurus context + contentTransform: (content) => content + // Fix relative links + .replace(/\]\(\.\.\//g, '](../../') + .replace(/\]\(\.\//g, '](') + // Fix MDX compilation issues with angle bracket URLs + .replace(/<(http[s]?:\/\/[^>]+)>/g, '`$1`') + // Fix specific broken links to point to repository + .replace(/\[gke\.md\]\(gke\.md\)/g, '[gke.md](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/inference-scheduling/gke.md)') + .replace(/\[Temporary Istio Workaround\]\(\.\.\/\.\.\/\.\.\/istio-workaround\.md\)/g, '[Temporary Istio Workaround](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/istio-workaround.md)') + // Fix broken external references + .replace(/\]\(\.\.\/\.\.\/precise-prefix-cache-aware\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/tree/main/quickstart/examples/precise-prefix-cache-aware)') + // Fix file references to point to repository + .replace(/\]\(([^)]+\.(yaml|sh))\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/inference-scheduling/$1)') + .replace(/\]\(\.\.\/\.\.\/\.\.\/([^)]+\.sh)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/$1)') + // Convert other relative markdown links to repository links + .replace(/\]\(([^)]+\.md)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/inference-scheduling/$1)') + }); + } + return undefined; + }, + }, +]; \ No newline at end of file diff --git a/remote-content/remote-sources/guide-pd-disaggregation.js b/remote-content/remote-sources/guide-pd-disaggregation.js new file mode 100644 index 0000000..dc9005d --- /dev/null +++ b/remote-content/remote-sources/guide-pd-disaggregation.js @@ -0,0 +1,53 @@ +/** + * Guide Prefill-Decode Disaggregation Remote Content + * + * Downloads the README.md file from the pd-disaggregation directory in llm-d-infra repository + * and transforms it into docs/guide/Installation/pd-disaggregation.md + */ + +import { createContentWithSource } from './utils.js'; + +export default [ + 'docusaurus-plugin-remote-content', + { + // Basic configuration + name: 'guide-pd-disaggregation', + sourceBaseUrl: 'https://raw.githubusercontent.com/llm-d-incubation/llm-d-infra/main/', + outDir: 'docs/guide/Installation', + documents: ['quickstart/examples/pd-disaggregation/README.md'], + + // Plugin behavior + noRuntimeDownloads: false, // Download automatically when building + performCleanup: true, // Clean up files after build + + // Transform the content for this specific document + modifyContent(filename, content) { + if (filename === 'quickstart/examples/pd-disaggregation/README.md') { + return createContentWithSource({ + title: 'Prefill-Decode Disaggregation', + description: 'Well-lit path for prefill-decode disaggregation in llm-d', + sidebarLabel: 'Prefill-Decode Disaggregation', + sidebarPosition: 3, + filename: 'quickstart/examples/pd-disaggregation/README.md', + newFilename: 'pd-disaggregation.md', + repoUrl: 'https://github.com/llm-d-incubation/llm-d-infra', + branch: 'main', + content, + // Transform content to work in docusaurus context + contentTransform: (content) => content + // Fix relative links + .replace(/\]\(\.\.\//g, '](../../') + .replace(/\]\(\.\//g, '](') + // Fix MDX compilation issues with angle bracket URLs + .replace(/<(http[s]?:\/\/[^>]+)>/g, '`$1`') + // Fix file references to point to repository + .replace(/\]\(([^)]+\.(yaml|sh|json))\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/pd-disaggregation/$1)') + .replace(/\]\(\.\.\/\.\.\/\.\.\/([^)]+\.sh)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/$1)') + // Convert relative markdown links to repository links + .replace(/\]\(([^)]+\.md)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/pd-disaggregation/$1)') + }); + } + return undefined; + }, + }, +]; \ No newline at end of file diff --git a/remote-content/remote-sources/guide-prerequisites.js b/remote-content/remote-sources/guide-prerequisites.js new file mode 100644 index 0000000..be8013e --- /dev/null +++ b/remote-content/remote-sources/guide-prerequisites.js @@ -0,0 +1,56 @@ +/** + * Guide Prerequisites Remote Content + * + * Downloads the quickstart README.md file from the llm-d-infra repository + * and transforms it into docs/guide/Installation/prerequisites.md + */ + +import { createContentWithSource } from './utils.js'; + +export default [ + 'docusaurus-plugin-remote-content', + { + // Basic configuration + name: 'guide-prerequisites', + sourceBaseUrl: 'https://raw.githubusercontent.com/llm-d-incubation/llm-d-infra/main/', + outDir: 'docs/guide/Installation', + documents: ['quickstart/README.md'], + + // Plugin behavior + noRuntimeDownloads: false, // Download automatically when building + performCleanup: true, // Clean up files after build + + // Transform the content for this specific document + modifyContent(filename, content) { + if (filename === 'quickstart/README.md') { + return createContentWithSource({ + title: 'Prerequisites', + description: 'Prerequisites for running the llm-d QuickStart', + sidebarLabel: 'Prerequisites', + sidebarPosition: 1, + filename: 'quickstart/README.md', + newFilename: 'prerequisites.md', + repoUrl: 'https://github.com/llm-d-incubation/llm-d-infra', + branch: 'main', + content, + // Transform content to work in docusaurus context + contentTransform: (content) => content + // Fix any relative links that might break + .replace(/\]\(examples\//g, '](../') + .replace(/\]\(\.\//g, '](') + // Fix MDX compilation issues with angle bracket URLs + .replace(/<(http[s]?:\/\/[^>]+)>/g, '`$1`') + // Convert relative markdown links to repository links + .replace(/\]\(([^)]+\.md)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/$1)') + // Fix file links to point to repository + .replace(/\]\(([^)]+\.(yaml|json|sh))\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/$1)') + // Fix broken anchor reference + .replace(/#openshift-and-grafana/g, '#install-on-openshift') + // Fix relative path references to files + .replace(/\]\(grafana\/dashboards\/([^)]+)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/grafana/dashboards/$1)') + }); + } + return undefined; + }, + }, +]; \ No newline at end of file diff --git a/remote-content/remote-sources/guide-wide-ep-lws.js b/remote-content/remote-sources/guide-wide-ep-lws.js new file mode 100644 index 0000000..e83fc80 --- /dev/null +++ b/remote-content/remote-sources/guide-wide-ep-lws.js @@ -0,0 +1,53 @@ +/** + * Guide Wide Endpoint LWS Remote Content + * + * Downloads the README.md file from the wide-ep-lws directory in llm-d-infra repository + * and transforms it into docs/guide/Installation/wide-ep-lws.md + */ + +import { createContentWithSource } from './utils.js'; + +export default [ + 'docusaurus-plugin-remote-content', + { + // Basic configuration + name: 'guide-wide-ep-lws', + sourceBaseUrl: 'https://raw.githubusercontent.com/llm-d-incubation/llm-d-infra/main/', + outDir: 'docs/guide/Installation', + documents: ['quickstart/examples/wide-ep-lws/README.md'], + + // Plugin behavior + noRuntimeDownloads: false, // Download automatically when building + performCleanup: true, // Clean up files after build + + // Transform the content for this specific document + modifyContent(filename, content) { + if (filename === 'quickstart/examples/wide-ep-lws/README.md') { + return createContentWithSource({ + title: 'Wide Endpoint LWS', + description: 'Well-lit path for wide endpoint Leaderworker Set configuration in llm-d', + sidebarLabel: 'Wide Endpoint LWS', + sidebarPosition: 4, + filename: 'quickstart/examples/wide-ep-lws/README.md', + newFilename: 'wide-ep-lws.md', + repoUrl: 'https://github.com/llm-d-incubation/llm-d-infra', + branch: 'main', + content, + // Transform content to work in docusaurus context + contentTransform: (content) => content + // Fix relative links + .replace(/\]\(\.\.\//g, '](../../') + .replace(/\]\(\.\//g, '](') + // Fix MDX compilation issues with angle bracket URLs + .replace(/<(http[s]?:\/\/[^>]+)>/g, '`$1`') + // Fix file references to point to repository + .replace(/\]\(([^)]+\.(yaml|sh|json))\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/wide-ep-lws/$1)') + .replace(/\]\(\.\.\/\.\.\/\.\.\/([^)]+\.sh)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/$1)') + // Convert relative markdown links to repository links + .replace(/\]\(([^)]+\.md)\)/g, '](https://github.com/llm-d-incubation/llm-d-infra/blob/main/quickstart/examples/wide-ep-lws/$1)') + }); + } + return undefined; + }, + }, +]; \ No newline at end of file