From 872506c0e01afd7e24d45ee6d3731a730b8b7562 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Fri, 22 Aug 2025 07:14:07 +0200
Subject: [PATCH 01/44] Prepare release branch
---
versions-config.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/versions-config.json b/versions-config.json
index f46e30f5..3dae6a84 100644
--- a/versions-config.json
+++ b/versions-config.json
@@ -1,7 +1,7 @@
{
"COMMENT1": "These values are used for yarn local yarn builds",
"COMMENT2": "Build time values are set in _build_scripts/update-config-versions.js",
- "weaviate_version": "1.32.1",
+ "weaviate_version": "1.33.0",
"helm_version": "17.3.3",
"weaviate_cli_version": "3.2.2",
"weaviate_agents_version": "0.12.0",
From ccc5e0b888272a1ad9e097f462cb61d5bd4ec263 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Fri, 22 Aug 2025 07:32:41 +0200
Subject: [PATCH 02/44] Add 1-bit RQ changes
---
.../rq-compression-parameters.mdx | 8 ++--
docs/weaviate/concepts/vector-quantization.md | 43 ++++++++++++++++---
.../compression/rq-compression.md | 5 ++-
3 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/_includes/configuration/rq-compression-parameters.mdx b/_includes/configuration/rq-compression-parameters.mdx
index 0e33fba6..7bee774c 100644
--- a/_includes/configuration/rq-compression-parameters.mdx
+++ b/_includes/configuration/rq-compression-parameters.mdx
@@ -1,4 +1,4 @@
-| Parameter | Type | Default | Details |
-| :------------------- | :------ | :------ | :--------------------------------------------------------------------------------------- |
-| `rq`: `bits` | integer | 8 | The number of bits used to quantize each data point. Currently only 8 bits is supported. |
-| `rq`: `rescoreLimit` | integer | -1 | The minimum number of candidates to fetch before rescoring. |
+| Parameter | Type | Default | Details |
+| :------------------- | :------ | :------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `rq`: `bits` | integer | `8` | The number of bits used to quantize each data point. Can be `8` or `1`. Learn more about [8-bit](/weaviate/concepts/vector-quantization#8-bit-rq) and [1-bit](/weaviate/concepts/vector-quantization#1-bit-rq) RQ. |
+| `rq`: `rescoreLimit` | integer | `-1` | The minimum number of candidates to fetch before rescoring. |
diff --git a/docs/weaviate/concepts/vector-quantization.md b/docs/weaviate/concepts/vector-quantization.md
index 3945e487..439cd945 100644
--- a/docs/weaviate/concepts/vector-quantization.md
+++ b/docs/weaviate/concepts/vector-quantization.md
@@ -29,7 +29,7 @@ Vector quantization is a technique that reduces the memory footprint of vector e
PQ reduces the size of each vector embedding in two steps. First, it reduces the number of vector dimensions to a smaller number of "segments", and then each segment is quantized to a smaller number of bits from the original number of bits (typically a 32-bit float).
-import PQTradeoffs from '/_includes/configuration/pq-compression/tradeoffs.mdx' ;
+import PQTradeoffs from '/\_includes/configuration/pq-compression/tradeoffs.mdx' ;
@@ -50,7 +50,7 @@ The PQ `segments` controls the tradeoff between memory and recall. A larger `seg
Below is a list segment values for common vectorizer modules:
| Module | Model | Dimensions | Segments |
-|-------------|-----------------------------------------|------------|------------------------|
+| ----------- | --------------------------------------- | ---------- | ---------------------- |
| openai | text-embedding-ada-002 | 1536 | 512, 384, 256, 192, 96 |
| cohere | multilingual-22-12 | 768 | 384, 256, 192, 96 |
| huggingface | sentence-transformers/all-MiniLM-L12-v2 | 384 | 192, 128, 96 |
@@ -66,9 +66,11 @@ Weaviate uses a maximum of `trainingLimit` objects (per shard) for training, eve
After the PQ conversion completes, query and write to the index as normal. Distances may be slightly different due to the effects of quantization.
:::info Which objects are used for training?
+
- (`v1.27` and later) If the collection has more objects than the training limit, Weaviate randomly selects objects from the collection to train the codebook.
- - (`v1.26` and earlier) Weaviate uses the first `trainingLimit` objects in the collection to train the codebook.
+- (`v1.26` and earlier) Weaviate uses the first `trainingLimit` objects in the collection to train the codebook.
- If the collection has fewer objects than the training limit, Weaviate uses all objects in the collection to train the codebook.
+
:::
### Encoders
@@ -120,20 +122,45 @@ This means that the feature is still under development and may change in future
:::
-**Rotational quantization (RQ)** is an untrained 8-bit quantization technique that provides 4x compression while maintaining 98-99% recall on most datasets. Unlike SQ, RQ requires no training phase and can be enabled immediately at index creation. RQ works in two steps:
+**Rotational quantization (RQ)** is an untrained quantization technique that provides significant compression while maintaining high recall on most datasets. Unlike SQ, RQ requires no training phase and can be enabled immediately at index creation. RQ is available in two variants: **8-bit RQ** and **1-bit RQ**.
+
+### 8-bit RQ
+
+8-bit RQ provides 4x compression while maintaining 98-99% recall on most datasets. RQ works in two steps:
1. **Fast pseudorandom rotation**: The input vector is transformed using a fast rotation based on the Walsh Hadamard Transform. This rotation takes approximately 7-10 microseconds for a 1536-dimensional vector. The output dimension is rounded up to the nearest multiple of 64.
2. **Scalar quantization**: Each entry of the rotated vector is quantized to an 8-bit integer. The minimum and maximum values of each individual rotated vector define the quantization interval.
+### 1-bit RQ
+
+1-bit RQ is an untrained asymmetric quantization method that provides close to 32x compression as dimensionality increases. This method is inspired by 1-bit RaBitQ and works as follows:
+
+1. **Fast pseudorandom rotation**: The same rotation process as 8-bit RQ is applied to the input vector.
+
+2. **Asymmetric quantization**:
+ - **Data vectors**: Quantized using 1 bit per dimension by storing only the sign of each entry
+ - **Query vectors**: Scalar quantized using 5 bits per dimension during search
+
+This asymmetric approach improves recall compared to symmetric 1-bit schemes (such as BQ) by using more precision for query vectors during distance calculation. On datasets well-suited for BQ (like OpenAI embeddings), 1-bit RQ essentially matches BQ recall. It also works well on datasets where BQ performs poorly (such as [SIFT](https://arxiv.org/abs/2504.09081))
+
+### RQ characteristics
+
The rotation step provides multiple benefits. It tends to reduce the quantization interval and decrease quantization error by distributing values more uniformly. It also distributes the distance information more evenly across all dimensions, providing a better starting point for distance estimation.
-It's worth noting that RQ rounds up dimensions to multiples of 64 which means that low-dimensional data (< 64 or 128 dimensions) might result in less than optimal compression.
+It's worth noting that both RQ variants round up dimensions to multiples of 64, which means that low-dimensional data (< 64 or 128 dimensions) might result in less than optimal compression.
+
+While inspired by extended RaBitQ, this implementation differs significantly for performance reasons. It uses fast pseudorandom rotations instead of truly random rotations.
+
+- For 8-bit RQ: employs scalar quantization instead of RaBitQ's encoding algorithm
+- For 1-bit RQ: uses word-level parallelism across dimensions for faster distance estimation
-While inspired by extended RaBitQ, this implementation differs significantly for performance reasons. It Uses fast pseudorandom rotations instead of truly random rotations and it employs scalar quantization instead of RaBitQ's encoding algorithm, which becomes prohibitively slow with more bits per entry.
+From the user perspective, 1-bit RQ is not a separate quantization method, but rather a configuration setting for RQ.
:::tip
+
Learn more about how to [configure rotational quantization](../configuration/compression/rq-compression.md) in Weaviate.
+
:::
## Over-fetching / re-scoring
@@ -173,6 +200,7 @@ In some cases, rescoring also includes over-fetching, whereby additional candida
## Further resources
:::info Related pages
+
- [Concepts: Indexing](./indexing/index.md)
- [Concepts: Vector Indexing](./indexing/vector-index.md)
- [Configuration: Vector index](../config-refs/indexing/vector-index.mdx)
@@ -182,10 +210,11 @@ In some cases, rescoring also includes over-fetching, whereby additional candida
- [How to configure: Scalar quantization (compression)](../configuration/compression/sq-compression.md)
- [How to configure: Rotational quantization (compression)](../configuration/compression/rq-compression.md)
- [Weaviate Academy: 250 Vector Compression](../../academy/py/compression/index.md)
+
:::
## Questions and feedback
-import DocsFeedback from '/_includes/docs-feedback.mdx';
+import DocsFeedback from '/\_includes/docs-feedback.mdx';
diff --git a/docs/weaviate/configuration/compression/rq-compression.md b/docs/weaviate/configuration/compression/rq-compression.md
index fe697bd0..fda60947 100644
--- a/docs/weaviate/configuration/compression/rq-compression.md
+++ b/docs/weaviate/configuration/compression/rq-compression.md
@@ -21,7 +21,10 @@ This means that the feature is still under development and may change in future
:::
-[**Rotational quantization (RQ)**](../../concepts/vector-quantization.md#rotational-quantization) is a fast untrained vector compression technique that offers 4x compression while retaining almost perfect recall (98-99% on most datasets).
+[**Rotational quantization (RQ)**](../../concepts/vector-quantization.md#rotational-quantization) is a fast untrained vector compression technique:
+
+- **8-bit RQ**: Up to 4x compression while retaining almost perfect recall (98-99% on most datasets).
+- **1-bit RQ**: Close to 32x compression as dimensionality increases with moderate recall across various datasets.
:::note HNSW only
RQ is currently not supported for the flat index type.
From 551e7fb8b674c3596edd2c2f3931057e30cebfaa Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Mon, 25 Aug 2025 11:42:45 +0200
Subject: [PATCH 03/44] Add batch import docs
---
_includes/code/howto/manage-data.import.py | 45 ++++
docs/weaviate/concepts/data-import.mdx | 65 ++++++
docs/weaviate/manage-objects/import.mdx | 229 ++++++++++++---------
3 files changed, 246 insertions(+), 93 deletions(-)
create mode 100644 docs/weaviate/concepts/data-import.mdx
diff --git a/_includes/code/howto/manage-data.import.py b/_includes/code/howto/manage-data.import.py
index 349ffa48..ff104e2e 100644
--- a/_includes/code/howto/manage-data.import.py
+++ b/_includes/code/howto/manage-data.import.py
@@ -557,4 +557,49 @@ def add_object(obj) -> None:
# Clean up
client.collections.delete("NewCollection")
+
+# ==================================================
+# ===== Server-side (automatic) batch import =====
+# ==================================================
+
+# Re-create the collection
+client.collections.delete("MyCollection")
+client.collections.create(
+ "MyCollection",
+ vector_config=Configure.Vectors.self_provided()
+)
+
+# START ServerSideBatchImportExample
+data_rows = [
+ {"title": f"Object {i+1}"} for i in range(5)
+]
+
+collection = client.collections.get("MyCollection")
+
+# highlight-start
+# Use `automatic` for server-side batching. The client will send data
+# in chunks and the server will dynamically manage the import process.
+with collection.batch.automatic() as batch:
+ for data_row in data_rows:
+ batch.add_object(
+ properties=data_row,
+ )
+# highlight-end
+ if batch.number_errors > 10:
+ print("Batch import stopped due to excessive errors.")
+ break
+
+failed_objects = collection.batch.failed_objects
+if failed_objects:
+ print(f"Number of failed imports: {len(failed_objects)}")
+ print(f"First failed object: {failed_objects[0]}")
+# END ServerSideBatchImportExample
+
+result = collection.aggregate.over_all(total_count=True)
+assert result.total_count == 5
+
+# Clean up
+client.collections.delete(collection.name)
+
+
client.close()
diff --git a/docs/weaviate/concepts/data-import.mdx b/docs/weaviate/concepts/data-import.mdx
new file mode 100644
index 00000000..43b51296
--- /dev/null
+++ b/docs/weaviate/concepts/data-import.mdx
@@ -0,0 +1,65 @@
+---
+title: Data import
+sidebar_position: 11
+description: "Theoretical explanation of client-side and server-side batch imports."
+image: og/docs/concepts.jpg
+---
+
+Weaviate offers two flexible methods for importing data in bulk: **client-side batching** and **server-side batching**. This allows you to choose the best strategy based on your specific needs for control and simplicity.
+
+### Client-side batching
+
+In the traditional client-side approach, **your application is responsible for grouping data into batches**. You define the exact size of each batch (e.g., 100 objects) using the appropriate [client library method](../manage-objects/import.mdx). The client then sends these pre-defined chunks to the Weaviate server.
+
+This method gives you direct control over the import process but may require manual tuning of parameters like `batch_size` and `concurrent_requests` to achieve optimal performance and avoid overwhelming the server.
+
+### Server-side batching
+
+Server-side batching, or **automatic mode**, is a more robust and the recommended approach. Here, the client sends data as a continuous stream, and the **Weaviate server intelligently manages the data flow**.
+
+Using an internal queue and a dynamic **backpressure** mechanism, the server tells the client how much data to send next based on its current workload. This simplifies your client code, eliminates the need for manual tuning, and results in a more efficient and resilient data import process.
+
+:::tip
+
+For **code examples**, check out the [How-to: Batch import](../manage-objects/import.mdx) guide. Currently, only the Python client supports batch imports.
+
+:::
+
+---
+
+## Server-side batching (Automatic mode)
+
+Weaviate's server-side batching, also known as **automatic batching**, is an intelligent mechanism designed to make data ingestion simpler, faster, and more robust. Instead of manually tuning batch parameters on the client side, you can let the server dynamically manage the data flow for optimal performance.
+
+This mode is a drop-in replacement for `fixed_size` or `dynamic` batching and is the recommended method for importing data.
+
+### How it works
+
+When you use the `automatic()` batching context manager, you initiate a persistent connection to the server for the duration of the batch job.
+
+1. **Client Sends Data**: Your client sends objects to the server in chunks. These requests execute instantly without waiting for the data to be fully imported.
+2. **Server Manages Queues**: The server places incoming objects into an internal queue system. This decouples the network communication from the actual database work (like vectorization and storage).
+3. **Dynamic Backpressure**: The server continuously monitors its internal queue size. It calculates an exponential moving average (EMA) of its workload and tells the client the ideal number of objects to send in the next chunk. This feedback loop allows the system to self-regulate, maximizing throughput without overwhelming the server.
+4. **Asynchronous Errors**: If an error occurs while processing an object (e.g., validation fails), the server sends the error message back to the client over a separate, dedicated stream without interrupting the flow of other objects.
+
+This architecture centralizes the complex batching logic on the server, resulting in a more efficient and stable data ingestion pipeline for all connected clients.
+
+:::info Why use automatic batching?
+
+- **Simplified client code**: No need to tweak `batch_size` and `concurrent_requests` manually. The server determines the optimal batch size based on its current workload.
+- **Improved stability**: The system automatically applies **backpressure**. If the server is busy, it will instruct the client to send less data, preventing overloads and request timeouts, which is especially useful during long-running vectorization tasks.
+- **Enhanced resilience**: It's designed to handle cluster events like node scaling more gracefully, reducing the risk of interrupted batches.
+- **Streamlined error handling**: Errors are streamed back from the server asynchronously. A single failed object won't halt the entire import process, and you can handle errors as they arrive.
+
+:::
+
+## Further resources
+
+- [How-to: Batch import](../manage-objects/import.mdx)
+- [How-to: Create objects](../manage-objects/create.mdx)
+
+## Questions and feedback
+
+import DocsFeedback from "/_includes/docs-feedback.mdx";
+
+
diff --git a/docs/weaviate/manage-objects/import.mdx b/docs/weaviate/manage-objects/import.mdx
index 02ae6110..b0961a76 100644
--- a/docs/weaviate/manage-objects/import.mdx
+++ b/docs/weaviate/manage-objects/import.mdx
@@ -4,18 +4,18 @@ sidebar_position: 15
image: og/docs/howto.jpg
---
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
-import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock';
-import PyCode from '!!raw-loader!/_includes/code/howto/manage-data.import.py';
-import PyCodeV3 from '!!raw-loader!/_includes/code/howto/manage-data.import-v3.py';
-import PySuppCode from '!!raw-loader!/_includes/code/howto/sample-data.py';
-import TSCode from '!!raw-loader!/_includes/code/howto/manage-data.import.ts';
-import TSCodeLegacy from '!!raw-loader!/_includes/code/howto/manage-data.import-v2.ts';
-import TsSuppCode from '!!raw-loader!/_includes/code/howto/sample-data.ts';
-import JavaCode from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.import.java';
-import GoCode from '!!raw-loader!/_includes/code/howto/go/docs/manage-data.import_test.go';
-import SkipLink from '/src/components/SkipValidationLink'
+import Tabs from "@theme/Tabs";
+import TabItem from "@theme/TabItem";
+import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock";
+import PyCode from "!!raw-loader!/_includes/code/howto/manage-data.import.py";
+import PyCodeV3 from "!!raw-loader!/_includes/code/howto/manage-data.import-v3.py";
+import PySuppCode from "!!raw-loader!/_includes/code/howto/sample-data.py";
+import TSCode from "!!raw-loader!/_includes/code/howto/manage-data.import.ts";
+import TSCodeLegacy from "!!raw-loader!/_includes/code/howto/manage-data.import-v2.ts";
+import TsSuppCode from "!!raw-loader!/_includes/code/howto/sample-data.ts";
+import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.import.java";
+import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/manage-data.import_test.go";
+import SkipLink from "/src/components/SkipValidationLink";
[Batch imports](../tutorials/import.md#to-batch-or-not-to-batch) are an efficient way to add multiple data objects and cross-references.
@@ -46,6 +46,7 @@ The following example adds objects to the `MyCollection` collection.
### Error handling
+
During a batch import, any failed objects or references will be stored and can be obtained through `batch.failed_objects` and `batch.failed_references`.
Additionally, a running count of failed objects and references is maintained and can be accessed through `batch.number_errors` within the context manager.
This counter can be used to stop the import process in order to investigate the failed objects or references.
@@ -53,7 +54,6 @@ This counter can be used to stop the import process in order to investigate the
Find out more about error handling on the Python client [reference page](/weaviate/client-libraries/python).
-
-
-
-
-
+## Server-side batching
+
+:::caution Technical preview
+
+Server-side batching was added in **`v1.33`** as a **technical preview**.
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
+
+:::
+
+Here's how to import objects into a collection named `MyCollection`. The client will handle sending the data in optimal chunks.
+
+
+
+
+
+### Error handling
+
+
+
+During a batch import, any failed objects or references will be stored and can be obtained through `batch.failed_objects` and `batch.failed_references`.
+Additionally, a running count of failed objects and references is maintained and can be accessed through `batch.number_errors` within the context manager.
+This counter can be used to stop the import process in order to investigate the failed objects or references.
+
+Find out more about error handling on the Python client [reference page](/weaviate/client-libraries/python).
+
+
+
+
+```typescript
+// TypeScript support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
## Use the gRPC API
:::info Added in `v1.23`.
@@ -108,20 +159,23 @@ Find out more about error handling on the Python client [reference page](/weavia
The [gRPC API](../api/index.mdx) is faster than the REST API. Use the gRPC API to improve import speeds.
-
+
-The Python client uses gRPC by default.
- The legacy Python client does not support gRPC.
+The Python client uses gRPC by default.
+
+
+The legacy Python client does not support gRPC.
The TypeScript client v3 uses gRPC by default.
- The legacy TypeScript client does not support gRPC.
+
+The legacy TypeScript client does not support gRPC.
@@ -133,6 +187,7 @@ Config config = new Config("http", "localhost:8080");
config.setGRPCSecured(false);
config.setGRPCHost("localhost:50051");
```
+
@@ -155,6 +210,7 @@ if err != nil {
require.Nil(t, err)
}
```
+
@@ -193,7 +249,6 @@ Weaviate generates an UUID for each object. Object IDs must be unique. If you se
language="py"
/>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
## Import with references
@@ -382,6 +424,7 @@ You can batch create links from an object to another other object through cross-
The Python clients have built-in batching methods to help you optimize import speed. For details, see the client documentation:
+
- [Python client `v4`](../client-libraries/python/index.mdx)
### Async Python client and batching
@@ -399,20 +442,20 @@ To try the example code, download the sample data and create the sample input fi
-
+
-
+
@@ -423,61 +466,60 @@ To try the example code, download the sample data and create the sample input fi
-
+
-
+
-
+
-
Stream CSV files example code
-
+
-
+
-
+
@@ -488,7 +530,7 @@ To try the example code, download the sample data and create the sample input fi
:::info Added in `v1.25`.
:::
-import BatchVectorizationOverview from '/_includes/code/client-libraries/batch-import.mdx';
+import BatchVectorizationOverview from "/_includes/code/client-libraries/batch-import.mdx";
@@ -498,7 +540,7 @@ You can configure the batch vectorization settings for each model provider, such
Note that each provider exposes different configuration options.
-
+
+
For details, see [auto-tenant](/weaviate/manage-collections/multi-tenancy#automatically-add-new-tenants).
-
-## Related pages
+## Further resources
- [Connect to Weaviate](/weaviate/connections/index.mdx)
- [How-to: Create objects](./create.mdx)
-- References: REST - /v1/batch
+-
+ References: REST - /v1/batch
+
- [Configuration: Indexes](/weaviate/config-refs/indexing/vector-index.mdx#asynchronous-indexing)
## Questions and feedback
-import DocsFeedback from '/_includes/docs-feedback.mdx';
+import DocsFeedback from "/_includes/docs-feedback.mdx";
-
+
From 7d35fc5015535a141c1c3d3139b90e545d3370f1 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Mon, 25 Aug 2025 12:34:18 +0200
Subject: [PATCH 04/44] Update docs
---
docs/weaviate/concepts/data-import.mdx | 28 ++++++++++++--------------
1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/docs/weaviate/concepts/data-import.mdx b/docs/weaviate/concepts/data-import.mdx
index 43b51296..48e19a47 100644
--- a/docs/weaviate/concepts/data-import.mdx
+++ b/docs/weaviate/concepts/data-import.mdx
@@ -5,19 +5,17 @@ description: "Theoretical explanation of client-side and server-side batch impor
image: og/docs/concepts.jpg
---
-Weaviate offers two flexible methods for importing data in bulk: **client-side batching** and **server-side batching**. This allows you to choose the best strategy based on your specific needs for control and simplicity.
+Weaviate offers two flexible methods for importing data in bulk: **client-side batching** and **server-side batching**. This allows you to choose the best strategy based on your specific needs.
-### Client-side batching
+- **Client-side batching**
+ In the traditional client-side approach, **your application is responsible for grouping data into batches**. You define the exact size of each batch (e.g., 100 objects) using the appropriate [client library method](../manage-objects/import.mdx). The client then sends these pre-defined chunks to the Weaviate server.
-In the traditional client-side approach, **your application is responsible for grouping data into batches**. You define the exact size of each batch (e.g., 100 objects) using the appropriate [client library method](../manage-objects/import.mdx). The client then sends these pre-defined chunks to the Weaviate server.
+ This method gives you direct control over the import process but may require manual tuning of parameters like `batch_size` and `concurrent_requests` to achieve optimal performance and avoid overwhelming the server.
-This method gives you direct control over the import process but may require manual tuning of parameters like `batch_size` and `concurrent_requests` to achieve optimal performance and avoid overwhelming the server.
+- **Server-side batching**
+ Server-side batching, or **automatic mode**, is a more robust and the recommended approach. Here, the client sends data as a continuous stream, and the **Weaviate server manages the data flow**.
-### Server-side batching
-
-Server-side batching, or **automatic mode**, is a more robust and the recommended approach. Here, the client sends data as a continuous stream, and the **Weaviate server intelligently manages the data flow**.
-
-Using an internal queue and a dynamic **backpressure** mechanism, the server tells the client how much data to send next based on its current workload. This simplifies your client code, eliminates the need for manual tuning, and results in a more efficient and resilient data import process.
+ Using an internal queue and a dynamic _[backpressure](https://en.wikipedia.org/wiki/Backpressure_routing)_ mechanism, the server tells the client how much data to send next based on its current workload. This simplifies your client code, eliminates the need for manual tuning, and results in a more efficient and resilient data import process.
:::tip
@@ -27,7 +25,7 @@ For **code examples**, check out the [How-to: Batch import](../manage-objects/im
---
-## Server-side batching (Automatic mode)
+## Server-side batching
Weaviate's server-side batching, also known as **automatic batching**, is an intelligent mechanism designed to make data ingestion simpler, faster, and more robust. Instead of manually tuning batch parameters on the client side, you can let the server dynamically manage the data flow for optimal performance.
@@ -37,14 +35,14 @@ This mode is a drop-in replacement for `fixed_size` or `dynamic` batching and is
When you use the `automatic()` batching context manager, you initiate a persistent connection to the server for the duration of the batch job.
-1. **Client Sends Data**: Your client sends objects to the server in chunks. These requests execute instantly without waiting for the data to be fully imported.
-2. **Server Manages Queues**: The server places incoming objects into an internal queue system. This decouples the network communication from the actual database work (like vectorization and storage).
-3. **Dynamic Backpressure**: The server continuously monitors its internal queue size. It calculates an exponential moving average (EMA) of its workload and tells the client the ideal number of objects to send in the next chunk. This feedback loop allows the system to self-regulate, maximizing throughput without overwhelming the server.
-4. **Asynchronous Errors**: If an error occurs while processing an object (e.g., validation fails), the server sends the error message back to the client over a separate, dedicated stream without interrupting the flow of other objects.
+1. **Client sends data**: Your client sends objects to the server in chunks. These requests execute instantly without waiting for the data to be fully imported.
+2. **Server manages queues**: The server places incoming objects into an internal queue system. This decouples the network communication from the actual database work (like vectorization and storage).
+3. **Dynamic backpressure**: The server continuously monitors its internal queue size. It calculates an exponential moving average (EMA) of its workload and tells the client the ideal number of objects to send in the next chunk. This feedback loop allows the system to self-regulate, maximizing throughput without overwhelming the server.
+4. **Asynchronous errors**: If an error occurs while processing an object (e.g., validation fails), the server sends the error message back to the client over a separate, dedicated stream without interrupting the flow of other objects.
This architecture centralizes the complex batching logic on the server, resulting in a more efficient and stable data ingestion pipeline for all connected clients.
-:::info Why use automatic batching?
+:::info Why use automatic (server-side) batching?
- **Simplified client code**: No need to tweak `batch_size` and `concurrent_requests` manually. The server determines the optimal batch size based on its current workload.
- **Improved stability**: The system automatically applies **backpressure**. If the server is busy, it will instruct the client to send less data, preventing overloads and request timeouts, which is especially useful during long-running vectorization tasks.
From 81025ed97834fc42d1349585e580368912a5d63a Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Tue, 26 Aug 2025 11:43:12 +0200
Subject: [PATCH 05/44] Update docs
---
docs/weaviate/concepts/data-import.mdx | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/docs/weaviate/concepts/data-import.mdx b/docs/weaviate/concepts/data-import.mdx
index 48e19a47..44f5f053 100644
--- a/docs/weaviate/concepts/data-import.mdx
+++ b/docs/weaviate/concepts/data-import.mdx
@@ -27,6 +27,14 @@ For **code examples**, check out the [How-to: Batch import](../manage-objects/im
## Server-side batching
+:::caution Technical preview
+
+Server-side batching was added in **`v1.33`** as a **technical preview**.
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
+
+:::
+
Weaviate's server-side batching, also known as **automatic batching**, is an intelligent mechanism designed to make data ingestion simpler, faster, and more robust. Instead of manually tuning batch parameters on the client side, you can let the server dynamically manage the data flow for optimal performance.
This mode is a drop-in replacement for `fixed_size` or `dynamic` batching and is the recommended method for importing data.
From 3b66388281ffd8c302abfee90a5c834708c56765 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Wed, 27 Aug 2025 12:04:22 +0200
Subject: [PATCH 06/44] Add OIDC testing
---
.gitignore | 4 +
.../howto.configure.rbac.oidc.groups.py | 500 ++++++++++++++++++
.../code/python/keycloak_helper_script.py | 493 +++++++++++++++++
tests/docker-compose-anon.yml | 2 +-
tests/docker-compose-rbac.yml | 42 +-
5 files changed, 1036 insertions(+), 5 deletions(-)
create mode 100644 _includes/code/python/howto.configure.rbac.oidc.groups.py
create mode 100644 _includes/code/python/keycloak_helper_script.py
diff --git a/.gitignore b/.gitignore
index e807b5f2..2e4793da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -258,3 +258,7 @@ __marimo__/
# Streamlit
.streamlit/secrets.toml
+
+# C# code
+_includes/code/csharp/bin
+_includes/code/csharp/obj
diff --git a/_includes/code/python/howto.configure.rbac.oidc.groups.py b/_includes/code/python/howto.configure.rbac.oidc.groups.py
new file mode 100644
index 00000000..b1dea599
--- /dev/null
+++ b/_includes/code/python/howto.configure.rbac.oidc.groups.py
@@ -0,0 +1,500 @@
+#!/usr/bin/env python3
+"""
+OIDC Group Management Testing Script with Built-in Keycloak Setup Helper
+Complete example of how to configure RBAC with OIDC groups in Weaviate
+"""
+
+import requests
+import json
+import sys
+from typing import Dict, Any, Optional
+import weaviate
+from weaviate.classes.init import Auth
+from weaviate.classes.rbac import Permissions
+
+def test_keycloak_connection(keycloak_ports: list = [8081]) -> Optional[str]:
+ """Test if Keycloak is accessible on common ports"""
+ # Try keycloak hostname first (requires /etc/hosts mapping), then localhost
+ keycloak_configs = [
+ ("keycloak", 8081), # This should match Weaviate's expected issuer
+ ("localhost", 8081) # Fallback for initial testing
+ ]
+
+ for host, port in keycloak_configs:
+ keycloak_url = f"http://{host}:{port}"
+ try:
+ # First check if Keycloak is responding at all
+ response = requests.get(f"{keycloak_url}", timeout=5)
+ if response.status_code == 200:
+ print(f"✅ Keycloak server found at {keycloak_url}")
+
+ # Check if master realm exists (always exists)
+ master_response = requests.get(f"{keycloak_url}/realms/master", timeout=5)
+ if master_response.status_code == 200:
+ print(f"✅ Keycloak realms accessible")
+
+ # Check if our test realm exists
+ test_response = requests.get(f"{keycloak_url}/realms/weaviate-test", timeout=5)
+ if test_response.status_code == 200:
+ print(f"✅ weaviate-test realm found")
+ print(f"✅ weaviate-test realm accessible")
+ return keycloak_url
+ else:
+ print(f"⚠️ weaviate-test realm not found - you'll need to create it")
+ return keycloak_url
+ except Exception as e:
+ print(f"Testing {keycloak_url}: {e}")
+ continue
+
+ print(f"❌ Cannot connect to Keycloak")
+ print("💡 Make sure you have '127.0.0.1 keycloak' in /etc/hosts")
+ print("💡 Run: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts")
+ return None
+
+def get_client_secret_instructions():
+ """Print instructions for getting client secret"""
+ print("\n" + "="*60)
+ print("📋 KEYCLOAK SETUP INSTRUCTIONS")
+ print("="*60)
+ print("Your Keycloak is running! Now you need to set it up:")
+ print("")
+ print("1. 🌐 Go to http://keycloak:8081 (make sure you have keycloak in /etc/hosts)")
+ print("2. 🔑 Login with username: admin, password: admin")
+ print("3. 🏛️ CREATE REALM:")
+ print(" - Click dropdown next to 'master' (top-left)")
+ print(" - Click 'Create Realm'")
+ print(" - Realm name: weaviate-test")
+ print(" - Click 'Create'")
+ print("")
+ print("4. 📱 CREATE CLIENT:")
+ print(" - Go to 'Clients' → 'Create client'")
+ print(" - Client ID: weaviate")
+ print(" - Client type: OpenID Connect")
+ print(" - Click 'Next'")
+ print(" - Enable 'Client authentication'")
+ print(" - Enable 'Direct access grants'")
+ print(" - Click 'Save'")
+ print("")
+ print("5. 🔐 GET CLIENT SECRET:")
+ print(" - Go to 'Credentials' tab")
+ print(" - Copy the 'Client secret' value")
+ print("")
+ print("6. 👥 CREATE GROUPS:")
+ print(" - Go to 'Groups'")
+ print(" - Create: /admin-group, /viewer-group, /my-test-group")
+ print("")
+ print("7. 👤 CREATE USERS:")
+ print(" - Go to 'Users' → 'Create new user'")
+ print(" - Username: test-admin, set password: password123")
+ print(" - Add to groups: /admin-group, /my-test-group")
+ print(" - Create test-viewer user, add to /viewer-group")
+ print("")
+ print("8. ⚙️ CONFIGURE GROUP CLAIMS:")
+ print(" - Go to 'Client scopes' → 'weaviate-dedicated'")
+ print(" - Add mapper → Group Membership")
+ print(" - Name: groups, Token Claim Name: groups")
+ print(" - Add to ID token: ON, Add to access token: ON")
+ print("="*60)
+
+def get_oidc_token(
+ keycloak_url: str = "http://keycloak:8081", # Changed default to keycloak hostname
+ realm: str = "weaviate-test",
+ client_id: str = "weaviate",
+ client_secret: str = "",
+ username: str = "test-admin",
+ password: str = "password123"
+) -> Optional[str]:
+ """Get OIDC token from Keycloak for a user"""
+ token_url = f"{keycloak_url}/realms/{realm}/protocol/openid-connect/token"
+
+ data = {
+ "grant_type": "password",
+ "client_id": client_id,
+ "client_secret": client_secret,
+ "username": username,
+ "password": password
+ }
+
+ try:
+ response = requests.post(token_url, data=data, timeout=10)
+
+ if response.status_code == 200:
+ token_data = response.json()
+ print(f"✅ Successfully got token for user: {username}")
+ return token_data["access_token"]
+ else:
+ print(f"❌ Failed to get token for {username}: {response.status_code}")
+ if response.status_code == 401:
+ print(" → Check username/password or client secret")
+ elif response.status_code == 400:
+ print(" → Check client configuration (Direct Access Grants enabled?)")
+ print(f" → Response: {response.text}")
+ return None
+
+ except Exception as e:
+ print(f"❌ Error getting token for {username}: {e}")
+ return None
+
+def decode_token_groups(token: str) -> Dict[str, Any]:
+ """Decode JWT token to see groups (for debugging)"""
+ try:
+ import jwt
+ decoded = jwt.decode(token, options={"verify_signature": False})
+ return decoded
+ except ImportError:
+ print("⚠️ Install PyJWT to see token details: pip install PyJWT")
+ return {}
+ except Exception as e:
+ print(f"⚠️ Error decoding token: {e}")
+ return {}
+
+def show_token_info(token: str, username: str):
+ """Display token information"""
+ decoded = decode_token_groups(token)
+ if decoded:
+ print(f" → Username: {decoded.get('preferred_username', 'N/A')}")
+ print(f" → Groups: {decoded.get('groups', [])}")
+ print(f" → Issuer: {decoded.get('iss', 'N/A')}")
+ print(f" → Realm roles: {decoded.get('realm_access', {}).get('roles', [])}")
+
+def run_automated_setup(keycloak_url: str) -> Optional[str]:
+ """Run automated Keycloak setup using Admin API"""
+ try:
+ # Import the automated setup class inline
+ import requests
+ from typing import Optional, Dict
+
+ class KeycloakSetup:
+ def __init__(self, keycloak_url: str = "http://keycloak:8081", admin_user: str = "admin", admin_pass: str = "admin"):
+ self.keycloak_url = keycloak_url.rstrip("/")
+ self.admin_user = admin_user
+ self.admin_pass = admin_pass
+ self.admin_token = None
+ self.realm_name = "weaviate-test"
+ self.client_id = "weaviate"
+
+ def get_admin_token(self) -> Optional[str]:
+ token_url = f"{self.keycloak_url}/realms/master/protocol/openid-connect/token"
+ data = {
+ "grant_type": "password",
+ "client_id": "admin-cli",
+ "username": self.admin_user,
+ "password": self.admin_pass,
+ }
+ try:
+ response = requests.post(token_url, data=data, timeout=10)
+ if response.status_code == 200:
+ self.admin_token = response.json()["access_token"]
+ return self.admin_token
+ return None
+ except Exception:
+ return None
+
+ def make_admin_request(self, method: str, endpoint: str, json_data: Dict = None) -> requests.Response:
+ headers = {
+ "Authorization": f"Bearer {self.admin_token}",
+ "Content-Type": "application/json",
+ }
+ url = f"{self.keycloak_url}/admin/realms{endpoint}"
+ if method.upper() == "GET":
+ return requests.get(url, headers=headers, timeout=10)
+ elif method.upper() == "POST":
+ return requests.post(url, headers=headers, json=json_data, timeout=10)
+
+ def create_realm(self) -> bool:
+ response = self.make_admin_request("GET", f"/{self.realm_name}")
+ if response.status_code == 200:
+ return True
+ realm_config = {"realm": self.realm_name, "enabled": True}
+ response = self.make_admin_request("POST", "", realm_config)
+ return response.status_code == 201
+
+ def create_client(self) -> Optional[str]:
+ client_config = {
+ "clientId": self.client_id,
+ "enabled": True,
+ "clientAuthenticatorType": "client-secret",
+ "secret": "weaviate-client-secret-123",
+ "redirectUris": ["*"],
+ "standardFlowEnabled": True,
+ "directAccessGrantsEnabled": True,
+ "publicClient": False,
+ }
+ response = self.make_admin_request("POST", f"/{self.realm_name}/clients", client_config)
+ if response.status_code == 201:
+ return "weaviate-client-secret-123"
+ return None
+
+ def setup_all(self) -> Optional[str]:
+ if not self.get_admin_token():
+ return None
+ if not self.create_realm():
+ return None
+ return self.create_client()
+
+ print("🤖 Running automated Keycloak setup...")
+ setup = KeycloakSetup(keycloak_url)
+ client_secret = setup.setup_all()
+
+ if client_secret:
+ print(f"\n🎉 Automated setup completed successfully!")
+ return client_secret
+ else:
+ print("❌ Automated setup failed")
+ return None
+ except Exception as e:
+ print(f"❌ Automated setup error: {e}")
+ return None
+
+def setup_and_validate_oidc() -> tuple[Optional[str], Optional[str]]:
+ """Setup and validate OIDC connection, return (client_secret, keycloak_url) if successful"""
+ print("🧪 KEYCLOAK OIDC SETUP VALIDATOR")
+ print("=" * 50)
+
+ # Test Keycloak connection
+ print("Testing Keycloak connection...")
+ keycloak_url = test_keycloak_connection([8081])
+ if not keycloak_url:
+ print("❌ Keycloak not accessible!")
+ print("\nTroubleshooting:")
+ print("1. Add keycloak to /etc/hosts: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts")
+ print("2. Check if docker-compose is running: docker-compose ps")
+ print("3. Check Keycloak logs: docker-compose logs keycloak")
+ return None, None
+
+ # Check if weaviate-test realm exists
+ try:
+ realm_response = requests.get(f"{keycloak_url}/realms/weaviate-test", timeout=5)
+ realm_exists = realm_response.status_code == 200
+ except:
+ realm_exists = False
+
+ if not realm_exists:
+ print(f"\n⚠️ The 'weaviate-test' realm doesn't exist yet.")
+ print("Choose your setup method:")
+ print("1. 🤖 Automated setup (recommended)")
+ print("2. 📋 Manual setup instructions")
+
+ choice = input("\nEnter choice (1 or 2): ").strip()
+
+ if choice == "1":
+ client_secret = run_automated_setup(keycloak_url)
+ if not client_secret:
+ return None, None
+ else:
+ get_client_secret_instructions()
+ setup_choice = input("\nHave you completed the manual setup? (y/n): ").strip().lower()
+ if setup_choice != 'y':
+ print("Please complete the Keycloak setup first, then run this script again.")
+ return None, None
+ client_secret = input("Enter Keycloak client secret: ").strip()
+ if not client_secret:
+ print("❌ Client secret required.")
+ return None, None
+ else:
+ print(f"✅ weaviate-test realm accessible")
+ print("\n" + "-"*30)
+ client_secret = input("Enter Keycloak client secret (or 'auto' for automated setup): ").strip()
+
+ if client_secret.lower() == 'auto':
+ client_secret = run_automated_setup(keycloak_url)
+ if not client_secret:
+ return None, None
+ elif not client_secret:
+ print("❌ Client secret required.")
+ return None, None
+
+ # Test tokens with the keycloak_url (which should be http://keycloak:8081)
+ print(f"\n🔑 Testing OIDC tokens...")
+ admin_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin")
+
+ if not admin_token:
+ print("\n❌ Cannot get admin token. Please verify:")
+ print("- User 'test-admin' exists with password 'password123'")
+ print("- User is in groups like '/admin-group'")
+ print("- Client 'weaviate' has 'Direct access grants' enabled")
+ print("- Client secret is correct")
+ return None, None
+
+ show_token_info(admin_token, "test-admin")
+
+ viewer_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer")
+ if viewer_token:
+ show_token_info(viewer_token, "test-viewer")
+ else:
+ print("⚠️ Viewer token failed, but continuing with admin token")
+
+ print("\n✅ OIDC setup validated successfully!")
+ return client_secret, keycloak_url
+
+def main():
+ """Main function to run OIDC group management tests"""
+
+ # Setup and validate OIDC first
+ client_secret, keycloak_url = setup_and_validate_oidc()
+ if not client_secret or not keycloak_url:
+ sys.exit(1)
+
+ print("\n" + "="*60)
+ print("🚀 STARTING OIDC GROUP MANAGEMENT TESTS")
+ print("="*60)
+
+ # START AdminClient
+ # Connect to Weaviate as root user (for admin operations)
+ admin_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.api_key("root-user-key"),
+ )
+ # END AdminClient
+
+ # Create test roles for group management
+ print("\n📋 Setting up test roles...")
+ permissions = [
+ Permissions.collections(
+ collection="TargetCollection*", read_config=True, create_collection=True
+ ),
+ Permissions.data(collection="TargetCollection*", read=True, create=True),
+ ]
+
+ admin_client.roles.delete(role_name="testRole")
+ admin_client.roles.create(role_name="testRole", permissions=permissions)
+
+ admin_client.roles.delete(role_name="groupViewerRole")
+ admin_client.roles.create(role_name="groupViewerRole", permissions=[
+ Permissions.data(collection="*", read=True)
+ ])
+
+ print("\n🔧 ADMIN OPERATIONS (Using API Key)")
+ print("-" * 40)
+
+ # START AssignOidcGroupRoles
+ admin_client.groups.oidc.assign_roles(group_id="/admin-group", role_names=["testRole", "viewer"])
+ admin_client.groups.oidc.assign_roles(group_id="/viewer-group", role_names=["viewer"])
+ admin_client.groups.oidc.assign_roles(group_id="/my-test-group", role_names=["groupViewerRole"])
+ print("✅ Assigned roles to OIDC groups")
+ # END AssignOidcGroupRoles
+
+ # START GetKnownOidcGroups
+ known_groups = admin_client.groups.oidc.get_known_group_names()
+ print(f"📁 Known OIDC groups ({len(known_groups)}):")
+ for group in known_groups:
+ print(f" - {group}")
+ # END GetKnownOidcGroups
+
+ # START GetGroupAssignments
+ group_assignments = admin_client.roles.get_group_assignments(role_name="testRole")
+ print(f"👥 Groups assigned to role 'testRole':")
+ for group in group_assignments:
+ print(f" - Group ID: {group.group_id}")
+ print(f" Group Type: {group.group_type}")
+ # END GetGroupAssignments
+
+ admin_client.close()
+
+ print(f"\n👤 OIDC USER OPERATIONS")
+ print("-" * 40)
+
+ # Get tokens for different users using keycloak_url
+ # START GetOidcToken
+ admin_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin")
+ viewer_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer")
+ # END GetOidcToken
+
+ if admin_token:
+ # START OidcAdminClient
+ # Connect as OIDC admin user
+ oidc_admin_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.bearer_token(admin_token),
+ )
+ # END OidcAdminClient
+
+ try:
+ # START GetCurrentUserRoles
+ my_user = oidc_admin_client.users.get_my_user()
+ current_roles = my_user.roles if my_user else []
+ print(f"🔑 Admin user's current roles ({len(current_roles)}):")
+ for role in current_roles:
+ print(f" - {role}")
+ # END GetCurrentUserRoles
+
+ # Test admin can see group assignments (if they have permissions)
+ try:
+ # START GetOidcGroupRoles
+ group_roles = oidc_admin_client.groups.oidc.get_assigned_roles(
+ group_id="/admin-group",
+ include_permissions=True
+ )
+ print(f"👑 Admin group roles:")
+ for role_name, role_obj in group_roles.items():
+ print(f" - {role_name}")
+ # END GetOidcGroupRoles
+ except Exception as e:
+ print(f"⚠️ Admin user cannot access group details: {e}")
+
+ except Exception as e:
+ print(f"❌ Error with admin OIDC operations: {e}")
+ finally:
+ oidc_admin_client.close()
+
+ if viewer_token:
+ # START OidcViewerClient
+ # Connect as OIDC viewer user
+ oidc_viewer_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.bearer_token(viewer_token),
+ )
+ # END OidcViewerClient
+
+ try:
+ # START GetCurrentUserRolesViewer
+ my_user = oidc_viewer_client.users.get_my_user()
+ current_roles = my_user.roles if my_user else []
+ print(f"👀 Viewer user's current roles ({len(current_roles)}):")
+ for role in current_roles:
+ print(f" - {role}")
+ # END GetCurrentUserRolesViewer
+
+ # Viewer should have limited permissions
+ try:
+ viewer_groups = oidc_viewer_client.groups.oidc.get_known_group_names()
+ print(f"👀 Viewer can see groups: {viewer_groups}")
+ except Exception as e:
+ print(f"⚠️ Viewer user cannot access group operations: {e}")
+
+ except Exception as e:
+ print(f"❌ Error with viewer OIDC operations: {e}")
+ finally:
+ oidc_viewer_client.close()
+
+ print(f"\n🧹 CLEANUP (Admin operations)")
+ print("-" * 40)
+
+ # Reconnect as admin for cleanup
+ admin_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.api_key("root-user-key"),
+ )
+
+ # START RevokeOidcGroupRoles
+ admin_client.groups.oidc.revoke_roles(group_id="/admin-group", role_names=["testRole", "viewer"])
+ admin_client.groups.oidc.revoke_roles(group_id="/viewer-group", role_names=["viewer"])
+ admin_client.groups.oidc.revoke_roles(group_id="/my-test-group", role_names=["groupViewerRole"])
+ print("✅ Revoked all assigned group roles")
+ # END RevokeOidcGroupRoles
+
+ # Verify cleanup
+ final_groups = admin_client.groups.oidc.get_known_group_names()
+ print(f"📁 Remaining known groups after cleanup: {final_groups}")
+
+ admin_client.close()
+
+ print(f"\n🎉 OIDC GROUP MANAGEMENT TESTING COMPLETE!")
+ print("="*60)
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/_includes/code/python/keycloak_helper_script.py b/_includes/code/python/keycloak_helper_script.py
new file mode 100644
index 00000000..c9f0eda0
--- /dev/null
+++ b/_includes/code/python/keycloak_helper_script.py
@@ -0,0 +1,493 @@
+#!/usr/bin/env python3
+"""
+Automated Keycloak Setup Script
+Creates realm, client, users, groups, and configures everything needed for OIDC testing
+"""
+
+import requests
+import json
+import time
+from typing import Optional, Dict, Any
+
+
+class KeycloakSetup:
+ def __init__(
+ self,
+ keycloak_url: str = "http://localhost:8081",
+ admin_user: str = "admin",
+ admin_pass: str = "admin",
+ ):
+ self.keycloak_url = keycloak_url.rstrip("/")
+ self.admin_user = admin_user
+ self.admin_pass = admin_pass
+ self.admin_token = None
+ self.realm_name = "weaviate-test"
+ self.client_id = "weaviate"
+
+ def get_admin_token(self) -> Optional[str]:
+ """Get admin token from master realm"""
+ print("🔑 Getting admin token...")
+
+ token_url = f"{self.keycloak_url}/realms/master/protocol/openid-connect/token"
+ data = {
+ "grant_type": "password",
+ "client_id": "admin-cli",
+ "username": self.admin_user,
+ "password": self.admin_pass,
+ }
+
+ try:
+ response = requests.post(token_url, data=data, timeout=10)
+ if response.status_code == 200:
+ self.admin_token = response.json()["access_token"]
+ print("✅ Got admin token")
+ return self.admin_token
+ else:
+ print(f"❌ Failed to get admin token: {response.status_code}")
+ print(f"Response: {response.text}")
+ return None
+ except Exception as e:
+ print(f"❌ Error getting admin token: {e}")
+ return None
+
+ def make_admin_request(
+ self, method: str, endpoint: str, json_data: Dict = None
+ ) -> requests.Response:
+ """Make authenticated request to Keycloak Admin API"""
+ if not self.admin_token:
+ raise Exception("No admin token available")
+
+ headers = {
+ "Authorization": f"Bearer {self.admin_token}",
+ "Content-Type": "application/json",
+ }
+
+ url = f"{self.keycloak_url}/admin/realms{endpoint}"
+
+ if method.upper() == "GET":
+ return requests.get(url, headers=headers, timeout=10)
+ elif method.upper() == "POST":
+ return requests.post(url, headers=headers, json=json_data, timeout=10)
+ elif method.upper() == "PUT":
+ return requests.put(url, headers=headers, json=json_data, timeout=10)
+ elif method.upper() == "DELETE":
+ return requests.delete(url, headers=headers, timeout=10)
+ else:
+ raise ValueError(f"Unsupported HTTP method: {method}")
+
+ def create_realm(self) -> bool:
+ """Create the weaviate-test realm"""
+ print(f"🏛️ Creating realm '{self.realm_name}'...")
+
+ # Check if realm already exists
+ response = self.make_admin_request("GET", f"/{self.realm_name}")
+ if response.status_code == 200:
+ print(f"✅ Realm '{self.realm_name}' already exists")
+ return True
+
+ # Create realm
+ realm_config = {
+ "realm": self.realm_name,
+ "enabled": True,
+ "displayName": "Weaviate Test Realm",
+ "registrationAllowed": False,
+ "loginWithEmailAllowed": True,
+ "duplicateEmailsAllowed": False,
+ "rememberMe": True,
+ "verifyEmail": False,
+ "loginTheme": None,
+ "accountTheme": None,
+ "adminTheme": None,
+ "emailTheme": None,
+ }
+
+ response = self.make_admin_request("POST", "", realm_config)
+ if response.status_code == 201:
+ print(f"✅ Created realm '{self.realm_name}'")
+ return True
+ else:
+ print(f"❌ Failed to create realm: {response.status_code}")
+ print(f"Response: {response.text}")
+ return False
+
+ def create_client(self) -> Optional[str]:
+ """Create the weaviate client and return client secret"""
+ print(f"📱 Creating client '{self.client_id}'...")
+
+ # Check if client already exists
+ response = self.make_admin_request("GET", f"/{self.realm_name}/clients")
+ if response.status_code == 200:
+ clients = response.json()
+ for client in clients:
+ if client.get("clientId") == self.client_id:
+ print(f"✅ Client '{self.client_id}' already exists")
+ client_uuid = client["id"]
+ return self.get_client_secret(client_uuid)
+
+ # Create client
+ client_config = {
+ "clientId": self.client_id,
+ "name": "Weaviate OIDC Client",
+ "enabled": True,
+ "clientAuthenticatorType": "client-secret",
+ "secret": "weaviate-client-secret-123", # Fixed secret for easier testing
+ "redirectUris": ["*"],
+ "webOrigins": ["*"],
+ "standardFlowEnabled": True,
+ "directAccessGrantsEnabled": True, # Enable Direct Access Grants
+ "serviceAccountsEnabled": True,
+ "publicClient": False,
+ "protocol": "openid-connect",
+ "attributes": {
+ "saml.assertion.signature": "false",
+ "saml.force.post.binding": "false",
+ "saml.multivalued.roles": "false",
+ "saml.encrypt": "false",
+ "saml.server.signature": "false",
+ "saml.server.signature.keyinfo.ext": "false",
+ "exclude.session.state.from.auth.response": "false",
+ "saml_force_name_id_format": "false",
+ "saml.client.signature": "false",
+ "tls.client.certificate.bound.access.tokens": "false",
+ "saml.authnstatement": "false",
+ "display.on.consent.screen": "false",
+ "saml.onetimeuse.condition": "false",
+ },
+ }
+
+ response = self.make_admin_request(
+ "POST", f"/{self.realm_name}/clients", client_config
+ )
+ if response.status_code == 201:
+ print(f"✅ Created client '{self.client_id}'")
+
+ # Get the client UUID
+ response = self.make_admin_request("GET", f"/{self.realm_name}/clients")
+ clients = response.json()
+ for client in clients:
+ if client.get("clientId") == self.client_id:
+ client_uuid = client["id"]
+ return self.get_client_secret(client_uuid)
+ else:
+ print(f"❌ Failed to create client: {response.status_code}")
+ print(f"Response: {response.text}")
+ return None
+
+ def get_client_secret(self, client_uuid: str) -> Optional[str]:
+ """Get client secret"""
+ response = self.make_admin_request(
+ "GET", f"/{self.realm_name}/clients/{client_uuid}/client-secret"
+ )
+ if response.status_code == 200:
+ secret = response.json().get("value")
+ print(f"🔐 Client secret: {secret}")
+ return secret
+ return None
+
+ def create_groups(self) -> bool:
+ """Create test groups"""
+ print("👥 Creating groups...")
+
+ groups_to_create = [
+ {"name": "admin-group", "path": "/admin-group"},
+ {"name": "viewer-group", "path": "/viewer-group"},
+ {"name": "my-test-group", "path": "/my-test-group"},
+ {"name": "another-test-group", "path": "/another-test-group"},
+ ]
+
+ for group_config in groups_to_create:
+ # Check if group exists
+ response = self.make_admin_request("GET", f"/{self.realm_name}/groups")
+ if response.status_code == 200:
+ existing_groups = response.json()
+ group_exists = any(
+ g.get("path") == group_config["path"] for g in existing_groups
+ )
+
+ if group_exists:
+ print(f"✅ Group '{group_config['path']}' already exists")
+ continue
+
+ # Create group
+ response = self.make_admin_request(
+ "POST", f"/{self.realm_name}/groups", group_config
+ )
+ if response.status_code == 201:
+ print(f"✅ Created group '{group_config['path']}'")
+ else:
+ print(
+ f"❌ Failed to create group '{group_config['path']}': {response.status_code}"
+ )
+ return False
+
+ return True
+
+ def create_users(self) -> bool:
+ """Create test users"""
+ print("👤 Creating users...")
+
+ users_to_create = [
+ {
+ "username": "test-admin",
+ "email": "admin@test.com",
+ "firstName": "Test",
+ "lastName": "Admin",
+ "password": "password123",
+ "groups": ["/admin-group", "/my-test-group"],
+ },
+ {
+ "username": "test-viewer",
+ "email": "viewer@test.com",
+ "firstName": "Test",
+ "lastName": "Viewer",
+ "password": "password123",
+ "groups": ["/viewer-group"],
+ },
+ ]
+
+ for user_config in users_to_create:
+ # Check if user exists
+ response = self.make_admin_request(
+ "GET", f"/{self.realm_name}/users?username={user_config['username']}"
+ )
+ if response.status_code == 200 and response.json():
+ print(f"✅ User '{user_config['username']}' already exists")
+ continue
+
+ # Create user
+ user_data = {
+ "username": user_config["username"],
+ "email": user_config["email"],
+ "firstName": user_config["firstName"],
+ "lastName": user_config["lastName"],
+ "enabled": True,
+ "emailVerified": True,
+ }
+
+ response = self.make_admin_request(
+ "POST", f"/{self.realm_name}/users", user_data
+ )
+ if response.status_code == 201:
+ print(f"✅ Created user '{user_config['username']}'")
+
+ # Get user ID from location header
+ user_id = response.headers["Location"].split("/")[-1]
+
+ # Set password
+ password_data = {
+ "type": "password",
+ "value": user_config["password"],
+ "temporary": False,
+ }
+
+ pwd_response = self.make_admin_request(
+ "PUT",
+ f"/{self.realm_name}/users/{user_id}/reset-password",
+ password_data,
+ )
+ if pwd_response.status_code == 204:
+ print(f"✅ Set password for '{user_config['username']}'")
+
+ # Add user to groups
+ self.add_user_to_groups(user_id, user_config["groups"])
+
+ else:
+ print(
+ f"❌ Failed to create user '{user_config['username']}': {response.status_code}"
+ )
+ return False
+
+ return True
+
+ def add_user_to_groups(self, user_id: str, group_paths: list):
+ """Add user to specified groups"""
+ # Get all groups
+ response = self.make_admin_request("GET", f"/{self.realm_name}/groups")
+ if response.status_code != 200:
+ print("❌ Failed to get groups")
+ return
+
+ all_groups = response.json()
+
+ for group_path in group_paths:
+ # Find group by path
+ group_id = None
+ for group in all_groups:
+ if group.get("path") == group_path:
+ group_id = group["id"]
+ break
+
+ if group_id:
+ # Add user to group
+ response = self.make_admin_request(
+ "PUT", f"/{self.realm_name}/users/{user_id}/groups/{group_id}", {}
+ )
+ if response.status_code == 204:
+ print(f"✅ Added user to group '{group_path}'")
+ else:
+ print(
+ f"❌ Failed to add user to group '{group_path}': {response.status_code}"
+ )
+ else:
+ print(f"❌ Group '{group_path}' not found")
+
+ def configure_group_mapper(self, client_uuid: str) -> bool:
+ """Configure group membership mapper directly on the client."""
+ print("⚙️ Configuring group mapper...")
+
+ mapper_config = {
+ "name": "groups",
+ "protocol": "openid-connect",
+ "protocolMapper": "oidc-group-membership-mapper",
+ "consentRequired": False,
+ "config": {
+ "full.path": "true", # Use true for full path like /admin-group
+ "id.token.claim": "true",
+ "access.token.claim": "true",
+ "claim.name": "groups",
+ "userinfo.token.claim": "true",
+ },
+ }
+
+ # The endpoint is now directly on the client
+ response = self.make_admin_request(
+ "POST",
+ f"/{self.realm_name}/clients/{client_uuid}/protocol-mappers/models",
+ mapper_config,
+ )
+
+ if response.status_code == 201:
+ print("✅ Configured group membership mapper")
+ return True
+ elif response.status_code == 409: # 409 Conflict means it already exists
+ print("✅ Group membership mapper already exists")
+ return True
+ else:
+ print(f"❌ Failed to configure group mapper: {response.status_code}")
+ print(f"Response: {response.text}")
+ return False
+
+ def configure_audience_mapper(self, client_uuid: str) -> bool:
+ """Configure audience mapper directly on the client."""
+ print("⚙️ Configuring audience mapper...")
+
+ mapper_config = {
+ "name": "weaviate-audience",
+ "protocol": "openid-connect",
+ "protocolMapper": "oidc-audience-mapper",
+ "config": {
+ "id.token.claim": "false",
+ "access.token.claim": "true",
+ "included.client.audience": self.client_id,
+ },
+ }
+
+ # The endpoint is now directly on the client
+ response = self.make_admin_request(
+ "POST",
+ f"/{self.realm_name}/clients/{client_uuid}/protocol-mappers/models",
+ mapper_config,
+ )
+
+ if response.status_code == 201:
+ print("✅ Configured audience mapper")
+ return True
+ elif response.status_code == 409: # 409 Conflict means it already exists
+ print("✅ Audience mapper already exists")
+ return True
+ else:
+ print(f"❌ Failed to configure audience mapper: {response.status_code}")
+ print(f"Response: {response.text}")
+ return False
+
+ def setup_all(self) -> Optional[str]:
+ """Run complete setup process"""
+ print("🚀 AUTOMATED KEYCLOAK SETUP")
+ print("=" * 50)
+
+ # Get admin token
+ if not self.get_admin_token():
+ return None
+
+ # Create realm
+ if not self.create_realm():
+ return None
+
+ # Create client and get secret
+ client_secret = self.create_client()
+ if not client_secret:
+ return None
+
+ # Create groups
+ if not self.create_groups():
+ return None
+
+ # Create users
+ if not self.create_users():
+ return None
+
+ # Get client UUID for mapper configuration
+ response = self.make_admin_request("GET", f"/{self.realm_name}/clients")
+ client_uuid = None
+ if response.status_code == 200:
+ clients = response.json()
+ for client in clients:
+ if client.get("clientId") == self.client_id:
+ client_uuid = client["id"]
+ break
+
+ if client_uuid:
+ self.configure_group_mapper(client_uuid)
+ self.configure_audience_mapper(client_uuid)
+
+ print("\n🎉 KEYCLOAK SETUP COMPLETE!")
+ print("=" * 50)
+ print(f"🌐 Keycloak URL: {self.keycloak_url}")
+ print(f"🏛️ Realm: {self.realm_name}")
+ print(f"📱 Client ID: {self.client_id}")
+ print(f"🔐 Client Secret: {client_secret}")
+ print("\n👤 Test Users:")
+ print(" - Username: test-admin, Password: password123")
+ print(" Groups: /admin-group, /my-test-group")
+ print(" - Username: test-viewer, Password: password123")
+ print(" Groups: /viewer-group")
+
+ return client_secret
+
+
+def main():
+ """Main function to run automated setup"""
+
+ # Test multiple ports
+ for port in [8081, 8082]:
+ keycloak_url = f"http://localhost:{port}"
+ print(f"Testing Keycloak at {keycloak_url}...")
+
+ try:
+ response = requests.get(keycloak_url, timeout=5)
+ if response.status_code == 200:
+ print(f"✅ Found Keycloak at {keycloak_url}")
+
+ # Run automated setup
+ setup = KeycloakSetup(keycloak_url)
+ client_secret = setup.setup_all()
+
+ if client_secret:
+ print(f"\n🔧 Your client secret for the Python script:")
+ print(f" {client_secret}")
+ print(f"\n▶️ Now run your OIDC Python script!")
+ return
+ else:
+ print("❌ Setup failed")
+ return
+
+ except Exception as e:
+ print(f"Cannot connect to {keycloak_url}: {e}")
+ continue
+
+ print("❌ Keycloak not found on ports 8081 or 8082")
+ print("Make sure docker-compose is running!")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/docker-compose-anon.yml b/tests/docker-compose-anon.yml
index 4052eda7..fae03eaa 100644
--- a/tests/docker-compose-anon.yml
+++ b/tests/docker-compose-anon.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.32.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-dev-4d2ab70.arm64
ports:
- 8080:8080
- 50051:50051
diff --git a/tests/docker-compose-rbac.yml b/tests/docker-compose-rbac.yml
index 712a09b8..bc029512 100644
--- a/tests/docker-compose-rbac.yml
+++ b/tests/docker-compose-rbac.yml
@@ -1,4 +1,3 @@
----
services:
weaviate_rbac:
command:
@@ -8,11 +7,13 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.32.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-dev-4d2ab70.arm64
ports:
- 8580:8080
- 50551:50051
restart: on-failure:0
+ depends_on:
+ - keycloak
environment:
QUERY_DEFAULTS_LIMIT: 25
PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
@@ -21,11 +22,44 @@ services:
ENABLE_API_BASED_MODULES: 'true'
BACKUP_FILESYSTEM_PATH: '/var/lib/weaviate/backups'
CLUSTER_HOSTNAME: 'node1'
- # AuthN and AuthZ settings
AUTHENTICATION_APIKEY_ENABLED: 'true'
AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'root-user-key'
AUTHENTICATION_APIKEY_USERS: 'root-user'
AUTHORIZATION_RBAC_ENABLED: 'true'
AUTHORIZATION_RBAC_ROOT_USERS: 'root-user'
AUTHENTICATION_DB_USERS_ENABLED: 'true'
-...
+ AUTHENTICATION_OIDC_ENABLED: 'true'
+ # Changed to use service name instead of localhost
+ #AUTHENTICATION_OIDC_ISSUER: 'http://localhost:8081/realms/weaviate-test'
+ AUTHENTICATION_OIDC_ISSUER: 'http://keycloak:8081/realms/weaviate-test'
+ AUTHENTICATION_OIDC_USERNAME_CLAIM: 'preferred_username'
+ AUTHENTICATION_OIDC_GROUPS_CLAIM: 'groups'
+ AUTHENTICATION_OIDC_CLIENT_ID: 'weaviate'
+
+ keycloak:
+ image: quay.io/keycloak/keycloak:24.0.3
+ ports:
+ - "8081:8081"
+ environment:
+ KEYCLOAK_ADMIN: admin
+ KEYCLOAK_ADMIN_PASSWORD: admin
+ KC_HTTP_ENABLED: 'true'
+ KC_HTTP_PORT: '8081'
+ KC_HOSTNAME_STRICT: 'false'
+ KC_HOSTNAME_STRICT_HTTPS: 'false'
+ # Use localhost so it works from both inside and outside Docker
+ #KC_HOSTNAME: localhost
+ KC_HOSTNAME_PORT: 8081
+ # This makes Keycloak use localhost URLs in OIDC discovery
+ KC_FRONTEND_URL: 'http://localhost:8081'
+ command:
+ - start-dev
+ volumes:
+ - keycloak_data:/opt/keycloak/data
+ # Remove health check for now since Keycloak is working
+ # healthcheck:
+ # test: ["CMD", "echo", "healthy"]
+ # interval: 10s
+
+volumes:
+ keycloak_data:
\ No newline at end of file
From a639a846189f4e51aa0b3f794bd6fe3de6951626 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Wed, 27 Aug 2025 12:50:07 +0200
Subject: [PATCH 07/44] Update docs
---
.../howto.configure.rbac.oidc.groups.py | 631 +++++++-----------
.../configuration/rbac/manage-groups.mdx | 252 +++++++
sidebars.js | 1 +
3 files changed, 508 insertions(+), 376 deletions(-)
create mode 100644 docs/weaviate/configuration/rbac/manage-groups.mdx
diff --git a/_includes/code/python/howto.configure.rbac.oidc.groups.py b/_includes/code/python/howto.configure.rbac.oidc.groups.py
index b1dea599..d93c8b72 100644
--- a/_includes/code/python/howto.configure.rbac.oidc.groups.py
+++ b/_includes/code/python/howto.configure.rbac.oidc.groups.py
@@ -5,68 +5,75 @@
"""
import requests
-import json
import sys
from typing import Dict, Any, Optional
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.rbac import Permissions
+
def test_keycloak_connection(keycloak_ports: list = [8081]) -> Optional[str]:
"""Test if Keycloak is accessible on common ports"""
# Try keycloak hostname first (requires /etc/hosts mapping), then localhost
keycloak_configs = [
("keycloak", 8081), # This should match Weaviate's expected issuer
- ("localhost", 8081) # Fallback for initial testing
+ ("localhost", 8081), # Fallback for initial testing
]
-
+
for host, port in keycloak_configs:
keycloak_url = f"http://{host}:{port}"
try:
# First check if Keycloak is responding at all
response = requests.get(f"{keycloak_url}", timeout=5)
if response.status_code == 200:
- print(f"✅ Keycloak server found at {keycloak_url}")
-
+ print(f"OK: Keycloak server found at {keycloak_url}")
+
# Check if master realm exists (always exists)
- master_response = requests.get(f"{keycloak_url}/realms/master", timeout=5)
+ master_response = requests.get(
+ f"{keycloak_url}/realms/master", timeout=5
+ )
if master_response.status_code == 200:
- print(f"✅ Keycloak realms accessible")
-
+ print(f"OK: Keycloak realms accessible")
+
# Check if our test realm exists
- test_response = requests.get(f"{keycloak_url}/realms/weaviate-test", timeout=5)
+ test_response = requests.get(
+ f"{keycloak_url}/realms/weaviate-test", timeout=5
+ )
if test_response.status_code == 200:
- print(f"✅ weaviate-test realm found")
- print(f"✅ weaviate-test realm accessible")
+ print(f"OK: weaviate-test realm found")
+ print(f"OK: weaviate-test realm accessible")
return keycloak_url
else:
- print(f"⚠️ weaviate-test realm not found - you'll need to create it")
+ print(
+ f"Warning: weaviate-test realm not found - you'll need to create it"
+ )
return keycloak_url
except Exception as e:
print(f"Testing {keycloak_url}: {e}")
continue
-
- print(f"❌ Cannot connect to Keycloak")
- print("💡 Make sure you have '127.0.0.1 keycloak' in /etc/hosts")
- print("💡 Run: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts")
+
+ print(f"Error: Cannot connect to Keycloak")
+ print("Hint: Make sure you have '127.0.0.1 keycloak' in /etc/hosts")
+ print("Hint: Run: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts")
return None
+
def get_client_secret_instructions():
"""Print instructions for getting client secret"""
- print("\n" + "="*60)
- print("📋 KEYCLOAK SETUP INSTRUCTIONS")
- print("="*60)
+ print("\n" + "=" * 60)
+ print("KEYCLOAK SETUP INSTRUCTIONS")
+ print("=" * 60)
print("Your Keycloak is running! Now you need to set it up:")
print("")
- print("1. 🌐 Go to http://keycloak:8081 (make sure you have keycloak in /etc/hosts)")
- print("2. 🔑 Login with username: admin, password: admin")
- print("3. 🏛️ CREATE REALM:")
+ print("1. Go to http://keycloak:8081 (make sure you have keycloak in /etc/hosts)")
+ print("2. Login with username: admin, password: admin")
+ print("3. CREATE REALM:")
print(" - Click dropdown next to 'master' (top-left)")
print(" - Click 'Create Realm'")
print(" - Realm name: weaviate-test")
print(" - Click 'Create'")
print("")
- print("4. 📱 CREATE CLIENT:")
+ print("4. CREATE CLIENT:")
print(" - Go to 'Clients' → 'Create client'")
print(" - Client ID: weaviate")
print(" - Client type: OpenID Connect")
@@ -75,426 +82,298 @@ def get_client_secret_instructions():
print(" - Enable 'Direct access grants'")
print(" - Click 'Save'")
print("")
- print("5. 🔐 GET CLIENT SECRET:")
+ print("5. GET CLIENT SECRET:")
print(" - Go to 'Credentials' tab")
print(" - Copy the 'Client secret' value")
print("")
- print("6. 👥 CREATE GROUPS:")
+ print("6. CREATE GROUPS:")
print(" - Go to 'Groups'")
print(" - Create: /admin-group, /viewer-group, /my-test-group")
print("")
- print("7. 👤 CREATE USERS:")
+ print("7. CREATE USERS:")
print(" - Go to 'Users' → 'Create new user'")
print(" - Username: test-admin, set password: password123")
print(" - Add to groups: /admin-group, /my-test-group")
print(" - Create test-viewer user, add to /viewer-group")
print("")
- print("8. ⚙️ CONFIGURE GROUP CLAIMS:")
- print(" - Go to 'Client scopes' → 'weaviate-dedicated'")
- print(" - Add mapper → Group Membership")
- print(" - Name: groups, Token Claim Name: groups")
- print(" - Add to ID token: ON, Add to access token: ON")
- print("="*60)
+ print("8. CONFIGURE GROUP & AUDIENCE CLAIMS:")
+ print(" - Go to the 'weaviate' client -> 'Client scopes' -> 'weaviate-dedicated'")
+ print(" - Add mapper -> Group Membership & Audience")
+ print("=" * 60)
+
def get_oidc_token(
- keycloak_url: str = "http://keycloak:8081", # Changed default to keycloak hostname
- realm: str = "weaviate-test",
+ keycloak_url: str,
+ client_secret: str,
+ username: str,
+ password: str = "password123",
+ realm: str = "weaviate-test",
client_id: str = "weaviate",
- client_secret: str = "",
- username: str = "test-admin",
- password: str = "password123"
) -> Optional[str]:
"""Get OIDC token from Keycloak for a user"""
token_url = f"{keycloak_url}/realms/{realm}/protocol/openid-connect/token"
-
+
data = {
"grant_type": "password",
"client_id": client_id,
"client_secret": client_secret,
"username": username,
- "password": password
+ "password": password,
}
-
+
try:
response = requests.post(token_url, data=data, timeout=10)
-
+
if response.status_code == 200:
token_data = response.json()
- print(f"✅ Successfully got token for user: {username}")
+ print(f"OK: Successfully got token for user: {username}")
return token_data["access_token"]
else:
- print(f"❌ Failed to get token for {username}: {response.status_code}")
+ print(f"Error: Failed to get token for {username}: {response.status_code}")
if response.status_code == 401:
print(" → Check username/password or client secret")
elif response.status_code == 400:
print(" → Check client configuration (Direct Access Grants enabled?)")
print(f" → Response: {response.text}")
return None
-
- except Exception as e:
- print(f"❌ Error getting token for {username}: {e}")
- return None
-def decode_token_groups(token: str) -> Dict[str, Any]:
- """Decode JWT token to see groups (for debugging)"""
- try:
- import jwt
- decoded = jwt.decode(token, options={"verify_signature": False})
- return decoded
- except ImportError:
- print("⚠️ Install PyJWT to see token details: pip install PyJWT")
- return {}
- except Exception as e:
- print(f"⚠️ Error decoding token: {e}")
- return {}
-
-def show_token_info(token: str, username: str):
- """Display token information"""
- decoded = decode_token_groups(token)
- if decoded:
- print(f" → Username: {decoded.get('preferred_username', 'N/A')}")
- print(f" → Groups: {decoded.get('groups', [])}")
- print(f" → Issuer: {decoded.get('iss', 'N/A')}")
- print(f" → Realm roles: {decoded.get('realm_access', {}).get('roles', [])}")
-
-def run_automated_setup(keycloak_url: str) -> Optional[str]:
- """Run automated Keycloak setup using Admin API"""
- try:
- # Import the automated setup class inline
- import requests
- from typing import Optional, Dict
-
- class KeycloakSetup:
- def __init__(self, keycloak_url: str = "http://keycloak:8081", admin_user: str = "admin", admin_pass: str = "admin"):
- self.keycloak_url = keycloak_url.rstrip("/")
- self.admin_user = admin_user
- self.admin_pass = admin_pass
- self.admin_token = None
- self.realm_name = "weaviate-test"
- self.client_id = "weaviate"
-
- def get_admin_token(self) -> Optional[str]:
- token_url = f"{self.keycloak_url}/realms/master/protocol/openid-connect/token"
- data = {
- "grant_type": "password",
- "client_id": "admin-cli",
- "username": self.admin_user,
- "password": self.admin_pass,
- }
- try:
- response = requests.post(token_url, data=data, timeout=10)
- if response.status_code == 200:
- self.admin_token = response.json()["access_token"]
- return self.admin_token
- return None
- except Exception:
- return None
-
- def make_admin_request(self, method: str, endpoint: str, json_data: Dict = None) -> requests.Response:
- headers = {
- "Authorization": f"Bearer {self.admin_token}",
- "Content-Type": "application/json",
- }
- url = f"{self.keycloak_url}/admin/realms{endpoint}"
- if method.upper() == "GET":
- return requests.get(url, headers=headers, timeout=10)
- elif method.upper() == "POST":
- return requests.post(url, headers=headers, json=json_data, timeout=10)
-
- def create_realm(self) -> bool:
- response = self.make_admin_request("GET", f"/{self.realm_name}")
- if response.status_code == 200:
- return True
- realm_config = {"realm": self.realm_name, "enabled": True}
- response = self.make_admin_request("POST", "", realm_config)
- return response.status_code == 201
-
- def create_client(self) -> Optional[str]:
- client_config = {
- "clientId": self.client_id,
- "enabled": True,
- "clientAuthenticatorType": "client-secret",
- "secret": "weaviate-client-secret-123",
- "redirectUris": ["*"],
- "standardFlowEnabled": True,
- "directAccessGrantsEnabled": True,
- "publicClient": False,
- }
- response = self.make_admin_request("POST", f"/{self.realm_name}/clients", client_config)
- if response.status_code == 201:
- return "weaviate-client-secret-123"
- return None
-
- def setup_all(self) -> Optional[str]:
- if not self.get_admin_token():
- return None
- if not self.create_realm():
- return None
- return self.create_client()
-
- print("🤖 Running automated Keycloak setup...")
- setup = KeycloakSetup(keycloak_url)
- client_secret = setup.setup_all()
-
- if client_secret:
- print(f"\n🎉 Automated setup completed successfully!")
- return client_secret
- else:
- print("❌ Automated setup failed")
- return None
except Exception as e:
- print(f"❌ Automated setup error: {e}")
+ print(f"Error: Error getting token for {username}: {e}")
return None
+
def setup_and_validate_oidc() -> tuple[Optional[str], Optional[str]]:
"""Setup and validate OIDC connection, return (client_secret, keycloak_url) if successful"""
- print("🧪 KEYCLOAK OIDC SETUP VALIDATOR")
+ print("KEYCLOAK OIDC SETUP VALIDATOR")
print("=" * 50)
-
+
# Test Keycloak connection
print("Testing Keycloak connection...")
keycloak_url = test_keycloak_connection([8081])
if not keycloak_url:
- print("❌ Keycloak not accessible!")
+ print("Error: Keycloak not accessible!")
print("\nTroubleshooting:")
- print("1. Add keycloak to /etc/hosts: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts")
+ print(
+ "1. Add keycloak to /etc/hosts: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts"
+ )
print("2. Check if docker-compose is running: docker-compose ps")
print("3. Check Keycloak logs: docker-compose logs keycloak")
return None, None
-
+
# Check if weaviate-test realm exists
try:
realm_response = requests.get(f"{keycloak_url}/realms/weaviate-test", timeout=5)
realm_exists = realm_response.status_code == 200
except:
realm_exists = False
-
+
if not realm_exists:
- print(f"\n⚠️ The 'weaviate-test' realm doesn't exist yet.")
- print("Choose your setup method:")
- print("1. 🤖 Automated setup (recommended)")
- print("2. 📋 Manual setup instructions")
-
- choice = input("\nEnter choice (1 or 2): ").strip()
-
- if choice == "1":
- client_secret = run_automated_setup(keycloak_url)
- if not client_secret:
- return None, None
- else:
- get_client_secret_instructions()
- setup_choice = input("\nHave you completed the manual setup? (y/n): ").strip().lower()
- if setup_choice != 'y':
- print("Please complete the Keycloak setup first, then run this script again.")
- return None, None
- client_secret = input("Enter Keycloak client secret: ").strip()
- if not client_secret:
- print("❌ Client secret required.")
- return None, None
- else:
- print(f"✅ weaviate-test realm accessible")
- print("\n" + "-"*30)
- client_secret = input("Enter Keycloak client secret (or 'auto' for automated setup): ").strip()
-
- if client_secret.lower() == 'auto':
- client_secret = run_automated_setup(keycloak_url)
- if not client_secret:
- return None, None
- elif not client_secret:
- print("❌ Client secret required.")
+ print(f"\nWarning: The 'weaviate-test' realm doesn't exist yet.")
+ print("Manual setup instructions")
+
+ get_client_secret_instructions()
+ setup_choice = (
+ input("\nHave you completed the manual setup? (y/n): ").strip().lower()
+ )
+ if setup_choice != "y":
+ print(
+ "Please complete the Keycloak setup first, then run this script again."
+ )
return None, None
-
+ client_secret = input("Enter Keycloak client secret: ").strip()
+ if not client_secret:
+ print("Error: Client secret required.")
+ return None, None
+ else:
+ print(f"OK: weaviate-test realm accessible")
+ print("\n" + "-" * 30)
+ # Using a fixed secret for automated testing
+ client_secret = "weaviate-client-secret-123"
+ print(f"Using client secret: {client_secret}")
+
# Test tokens with the keycloak_url (which should be http://keycloak:8081)
- print(f"\n🔑 Testing OIDC tokens...")
- admin_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin")
-
+ print(f"\nTesting OIDC tokens...")
+ admin_token = get_oidc_token(
+ keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin"
+ )
+
if not admin_token:
- print("\n❌ Cannot get admin token. Please verify:")
+ print("\nError: Cannot get admin token. Please verify:")
print("- User 'test-admin' exists with password 'password123'")
print("- User is in groups like '/admin-group'")
print("- Client 'weaviate' has 'Direct access grants' enabled")
print("- Client secret is correct")
return None, None
-
- show_token_info(admin_token, "test-admin")
-
- viewer_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer")
- if viewer_token:
- show_token_info(viewer_token, "test-viewer")
- else:
- print("⚠️ Viewer token failed, but continuing with admin token")
-
- print("\n✅ OIDC setup validated successfully!")
- return client_secret, keycloak_url
-def main():
- """Main function to run OIDC group management tests"""
-
- # Setup and validate OIDC first
- client_secret, keycloak_url = setup_and_validate_oidc()
- if not client_secret or not keycloak_url:
- sys.exit(1)
-
- print("\n" + "="*60)
- print("🚀 STARTING OIDC GROUP MANAGEMENT TESTS")
- print("="*60)
-
- # START AdminClient
- # Connect to Weaviate as root user (for admin operations)
- admin_client = weaviate.connect_to_local(
- port=8580,
- grpc_port=50551,
- auth_credentials=Auth.api_key("root-user-key"),
+ viewer_token = get_oidc_token(
+ keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer"
)
- # END AdminClient
-
- # Create test roles for group management
- print("\n📋 Setting up test roles...")
- permissions = [
- Permissions.collections(
- collection="TargetCollection*", read_config=True, create_collection=True
- ),
- Permissions.data(collection="TargetCollection*", read=True, create=True),
- ]
+ if not viewer_token:
+ print("Warning: Viewer token failed, but continuing with admin token")
- admin_client.roles.delete(role_name="testRole")
- admin_client.roles.create(role_name="testRole", permissions=permissions)
-
- admin_client.roles.delete(role_name="groupViewerRole")
- admin_client.roles.create(role_name="groupViewerRole", permissions=[
- Permissions.data(collection="*", read=True)
- ])
-
- print("\n🔧 ADMIN OPERATIONS (Using API Key)")
- print("-" * 40)
-
- # START AssignOidcGroupRoles
- admin_client.groups.oidc.assign_roles(group_id="/admin-group", role_names=["testRole", "viewer"])
- admin_client.groups.oidc.assign_roles(group_id="/viewer-group", role_names=["viewer"])
- admin_client.groups.oidc.assign_roles(group_id="/my-test-group", role_names=["groupViewerRole"])
- print("✅ Assigned roles to OIDC groups")
- # END AssignOidcGroupRoles
-
- # START GetKnownOidcGroups
- known_groups = admin_client.groups.oidc.get_known_group_names()
- print(f"📁 Known OIDC groups ({len(known_groups)}):")
- for group in known_groups:
- print(f" - {group}")
- # END GetKnownOidcGroups
-
- # START GetGroupAssignments
- group_assignments = admin_client.roles.get_group_assignments(role_name="testRole")
- print(f"👥 Groups assigned to role 'testRole':")
- for group in group_assignments:
- print(f" - Group ID: {group.group_id}")
- print(f" Group Type: {group.group_type}")
- # END GetGroupAssignments
-
- admin_client.close()
-
- print(f"\n👤 OIDC USER OPERATIONS")
- print("-" * 40)
-
- # Get tokens for different users using keycloak_url
- # START GetOidcToken
- admin_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin")
- viewer_token = get_oidc_token(keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer")
- # END GetOidcToken
-
- if admin_token:
- # START OidcAdminClient
- # Connect as OIDC admin user
- oidc_admin_client = weaviate.connect_to_local(
- port=8580,
- grpc_port=50551,
- auth_credentials=Auth.bearer_token(admin_token),
- )
- # END OidcAdminClient
-
- try:
- # START GetCurrentUserRoles
- my_user = oidc_admin_client.users.get_my_user()
- current_roles = my_user.roles if my_user else []
- print(f"🔑 Admin user's current roles ({len(current_roles)}):")
- for role in current_roles:
- print(f" - {role}")
- # END GetCurrentUserRoles
-
- # Test admin can see group assignments (if they have permissions)
- try:
- # START GetOidcGroupRoles
- group_roles = oidc_admin_client.groups.oidc.get_assigned_roles(
- group_id="/admin-group",
- include_permissions=True
- )
- print(f"👑 Admin group roles:")
- for role_name, role_obj in group_roles.items():
- print(f" - {role_name}")
- # END GetOidcGroupRoles
- except Exception as e:
- print(f"⚠️ Admin user cannot access group details: {e}")
-
- except Exception as e:
- print(f"❌ Error with admin OIDC operations: {e}")
- finally:
- oidc_admin_client.close()
-
- if viewer_token:
- # START OidcViewerClient
- # Connect as OIDC viewer user
- oidc_viewer_client = weaviate.connect_to_local(
- port=8580,
- grpc_port=50551,
- auth_credentials=Auth.bearer_token(viewer_token),
- )
- # END OidcViewerClient
-
- try:
- # START GetCurrentUserRolesViewer
- my_user = oidc_viewer_client.users.get_my_user()
- current_roles = my_user.roles if my_user else []
- print(f"👀 Viewer user's current roles ({len(current_roles)}):")
- for role in current_roles:
- print(f" - {role}")
- # END GetCurrentUserRolesViewer
-
- # Viewer should have limited permissions
- try:
- viewer_groups = oidc_viewer_client.groups.oidc.get_known_group_names()
- print(f"👀 Viewer can see groups: {viewer_groups}")
- except Exception as e:
- print(f"⚠️ Viewer user cannot access group operations: {e}")
-
- except Exception as e:
- print(f"❌ Error with viewer OIDC operations: {e}")
- finally:
- oidc_viewer_client.close()
-
- print(f"\n🧹 CLEANUP (Admin operations)")
- print("-" * 40)
-
- # Reconnect as admin for cleanup
- admin_client = weaviate.connect_to_local(
- port=8580,
- grpc_port=50551,
- auth_credentials=Auth.api_key("root-user-key"),
- )
-
- # START RevokeOidcGroupRoles
- admin_client.groups.oidc.revoke_roles(group_id="/admin-group", role_names=["testRole", "viewer"])
- admin_client.groups.oidc.revoke_roles(group_id="/viewer-group", role_names=["viewer"])
- admin_client.groups.oidc.revoke_roles(group_id="/my-test-group", role_names=["groupViewerRole"])
- print("✅ Revoked all assigned group roles")
- # END RevokeOidcGroupRoles
-
- # Verify cleanup
- final_groups = admin_client.groups.oidc.get_known_group_names()
- print(f"📁 Remaining known groups after cleanup: {final_groups}")
-
- admin_client.close()
+ print("\nOK: OIDC setup validated successfully!")
+ return client_secret, keycloak_url
- print(f"\n🎉 OIDC GROUP MANAGEMENT TESTING COMPLETE!")
- print("="*60)
-if __name__ == "__main__":
- main()
\ No newline at end of file
+# Setup and validate OIDC first
+client_secret, keycloak_url = setup_and_validate_oidc()
+if not client_secret or not keycloak_url:
+ sys.exit(1)
+
+print("\n" + "=" * 60)
+print("STARTING OIDC GROUP MANAGEMENT TESTS")
+print("=" * 60)
+
+# The admin_client is used for setup and cleanup that requires root privileges
+admin_client = None
+
+# START AdminClient
+# Connect to Weaviate as root user (for admin operations)
+admin_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.api_key("root-user-key"),
+)
+# END AdminClient
+
+# Create test roles for group management
+print("\nSetting up test roles...")
+permissions = [
+ Permissions.collections(
+ collection="TargetCollection*", read_config=True, create_collection=True
+ ),
+ Permissions.data(collection="TargetCollection*", read=True, create=True),
+]
+
+admin_client.roles.delete(role_name="testRole")
+admin_client.roles.create(role_name="testRole", permissions=permissions)
+
+admin_client.roles.delete(role_name="groupViewerRole")
+admin_client.roles.create(
+ role_name="groupViewerRole",
+ permissions=[Permissions.data(collection="*", read=True)],
+)
+
+print("\nADMIN OPERATIONS (Using API Key)")
+print("-" * 40)
+
+# START AssignOidcGroupRoles
+admin_client.groups.oidc.assign_roles(
+ group_id="/admin-group", role_names=["testRole", "viewer"]
+)
+# END AssignOidcGroupRoles
+admin_client.groups.oidc.assign_roles(group_id="/viewer-group", role_names=["viewer"])
+admin_client.groups.oidc.assign_roles(
+ group_id="/my-test-group", role_names=["groupViewerRole"]
+)
+
+# START GetKnownOidcGroups
+known_groups = admin_client.groups.oidc.get_known_group_names()
+print(f"Known OIDC groups ({len(known_groups)}): {known_groups}")
+# END GetKnownOidcGroups
+assert len(known_groups) == 3
+assert set(known_groups) == {"/admin-group", "/viewer-group", "/my-test-group"}
+
+# START GetGroupAssignments
+group_assignments = admin_client.roles.get_group_assignments(role_name="testRole")
+print(f"Groups assigned to role 'testRole':")
+for group in group_assignments:
+ print(f" - Group ID: {group.group_id}, Type: {group.group_type}")
+# END GetGroupAssignments
+assert len(group_assignments) == 1
+assert group_assignments[0].group_id == "/admin-group"
+
+print(f"\nOIDC USER OPERATIONS")
+print("-" * 40)
+
+# Get tokens for different users using keycloak_url
+# START GetOidcToken
+admin_token = get_oidc_token(
+ keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin"
+)
+viewer_token = get_oidc_token(
+ keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer"
+)
+# END GetOidcToken
+assert admin_token is not None
+assert viewer_token is not None
+
+# --- Admin User Tests ---
+# START OidcAdminClient
+# Connect as OIDC admin user
+oidc_admin_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.bearer_token(admin_token),
+)
+# END OidcAdminClient
+
+# START GetCurrentUserRoles
+my_user = oidc_admin_client.users.get_my_user()
+current_roles_dict = my_user.roles if my_user else []
+role_names = list(current_roles_dict.keys())
+print(f"Admin user's current roles ({len(role_names)}): {role_names}")
+# END GetCurrentUserRoles
+assert set(role_names) == {"viewer", "testRole", "groupViewerRole"}
+
+# START GetOidcGroupRoles
+group_roles = oidc_admin_client.groups.oidc.get_assigned_roles(
+ group_id="/admin-group", include_permissions=True
+)
+print(f"Roles assigned to '/admin-group': {list(group_roles.keys())}")
+# END GetOidcGroupRoles
+assert set(group_roles.keys()) == {"testRole", "viewer"}
+oidc_admin_client.close()
+
+# --- Viewer User Tests ---
+# START OidcViewerClient
+# Connect as OIDC viewer user
+oidc_viewer_client = weaviate.connect_to_local(
+ port=8580,
+ grpc_port=50551,
+ auth_credentials=Auth.bearer_token(viewer_token),
+)
+# END OidcViewerClient
+
+# START GetCurrentUserRolesViewer
+my_user = oidc_viewer_client.users.get_my_user()
+current_roles_dict = my_user.roles if my_user else {}
+role_names = list(current_roles_dict.keys())
+print(f"Viewer user's current roles ({len(role_names)}): {role_names}")
+# END GetCurrentUserRolesViewer
+assert role_names == ["viewer"]
+
+# Viewer should have limited permissions but can still see group names
+try:
+ viewer_groups = oidc_viewer_client.groups.oidc.get_known_group_names()
+ print(f"Viewer can see groups: {viewer_groups}")
+ assert set(viewer_groups) == {"/admin-group", "/viewer-group", "/my-test-group"}
+except Exception as e:
+ # This part should not be reached if permissions are set correctly
+ assert False, f"Viewer user failed to access group operations: {e}"
+
+oidc_viewer_client.close()
+
+print(f"\nCLEANUP (Admin operations)")
+print("-" * 40)
+
+# START RevokeOidcGroupRoles
+admin_client.groups.oidc.revoke_roles(
+ group_id="/admin-group", role_names=["testRole", "viewer"]
+)
+# END RevokeOidcGroupRoles
+admin_client.groups.oidc.revoke_roles(group_id="/viewer-group", role_names=["viewer"])
+admin_client.groups.oidc.revoke_roles(
+ group_id="/my-test-group", role_names=["groupViewerRole"]
+)
+
+# Verify cleanup
+final_groups = admin_client.groups.oidc.get_known_group_names()
+print(f"Remaining known groups after cleanup: {final_groups}")
+assert len(final_groups) == 0
+
+admin_client.close()
+
+print("\n" + "=" * 60)
+print("OIDC GROUP MANAGEMENT TESTING COMPLETE!")
+print("=" * 60)
diff --git a/docs/weaviate/configuration/rbac/manage-groups.mdx b/docs/weaviate/configuration/rbac/manage-groups.mdx
new file mode 100644
index 00000000..a1300638
--- /dev/null
+++ b/docs/weaviate/configuration/rbac/manage-groups.mdx
@@ -0,0 +1,252 @@
+---
+title: Manage groups
+sidebar_label: Manage groups
+image: og/docs/configuration.jpg
+# tags: ['rbac', 'groups', 'configuration', 'authorization', 'oidc']
+---
+
+import Link from "@docusaurus/Link";
+import Tabs from "@theme/Tabs";
+import TabItem from "@theme/TabItem";
+import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock";
+import OidcGroupPyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.oidc.groups.py";
+
+
+:::info Added in `v1.33`
+:::
+
+When using [OIDC](/deploy/configuration/oidc.md) for authentication, you can leverage user groups defined in your identity provider (like Keycloak, Okta, or Auth0) to manage permissions in Weaviate. The user's group memberships are passed to Weaviate in the OIDC token.
+
+You can then assign Weaviate roles directly to these **OIDC groups**. Any user who is a member of that group will automatically inherit the permissions of the assigned roles. This is a powerful way to manage access for large teams without assigning roles to each user individually.
+
+On this page, you will find examples of how to programmatically **manage OIDC groups** and their associated roles.
+
+## Group management {#group-management}
+
+### Assign roles to an OIDC group
+
+You can assign one or more Weaviate roles to an OIDC group. Any user belonging to this group will inherit the roles' permissions.
+
+This example assigns the `testRole` and `viewer` roles to the `/admin-group`.
+
+
+
+
+
+
+
+```typescript
+// TypeScript/JavaScript support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+### Revoke roles from an OIDC group
+
+You can revoke one or more roles from a specific OIDC group.
+
+This example removes the `testRole` and `viewer` roles from the `/admin-group`.
+
+
+
+
+
+
+
+```typescript
+// TypeScript/JavaScript support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+### List roles assigned to an OIDC group
+
+Retrieve a list of all roles that have been assigned to a specific OIDC group.
+
+
+
+
+
+
+
+```typescript
+// TypeScript/JavaScript support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+
+Example results
+
+```text
+Roles assigned to '/admin-group': ['testRole', 'viewer']
+```
+
+
+
+### List all known OIDC groups
+
+This example shows how to get a list of all OIDC groups that Weaviate is aware of. Weaviate learns about a group when a role is first assigned to it.
+
+
+
+
+
+
+
+```typescript
+// TypeScript/JavaScript support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+
+Example results
+
+```text
+Known OIDC groups (3): ['/viewer-group', '/admin-group', '/my-test-group']
+```
+
+
+
+### List groups assigned to a role
+
+Retrieve a list of all groups that have been assigned a specific role.
+
+This example shows which groups have the `testRole` assigned to them.
+
+
+
+
+
+
+
+```typescript
+// TypeScript/JavaScript support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+
+Example results
+
+```text
+Groups assigned to role 'testRole':
+ - Group ID: /admin-group, Type: oidc
+```
+
+
+
+## Further resources
+
+- [RBAC: Overview](https://www.google.com/search?q=./index.mdx)
+- [RBAC: Configuration](https://www.google.com/search?q=/deploy/configuration/configuring-rbac.md)
+- [RBAC: Manage roles](https://www.google.com/search?q=./manage-roles.mdx)
+- [RBAC: Manage users](https://www.google.com/search?q=./manage-users.mdx)
+
+## Questions and feedback
+
+import DocsFeedback from "/_includes/docs-feedback.mdx";
+
+
+````
diff --git a/sidebars.js b/sidebars.js
index 30203c35..9a5f7075 100644
--- a/sidebars.js
+++ b/sidebars.js
@@ -450,6 +450,7 @@ const sidebars = {
items: [
"weaviate/configuration/rbac/manage-roles",
"weaviate/configuration/rbac/manage-users",
+ "weaviate/configuration/rbac/manage-groups",
],
},
],
From b20fe4ecc30a92c308d50b6caf158a2c2ff3154d Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Wed, 27 Aug 2025 13:31:50 +0200
Subject: [PATCH 08/44] Update docs
---
.../howto.configure.rbac.oidc.groups.py | 64 ++-----------------
1 file changed, 5 insertions(+), 59 deletions(-)
diff --git a/_includes/code/python/howto.configure.rbac.oidc.groups.py b/_includes/code/python/howto.configure.rbac.oidc.groups.py
index d93c8b72..19d23433 100644
--- a/_includes/code/python/howto.configure.rbac.oidc.groups.py
+++ b/_includes/code/python/howto.configure.rbac.oidc.groups.py
@@ -6,7 +6,7 @@
import requests
import sys
-from typing import Dict, Any, Optional
+from typing import Optional
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.rbac import Permissions
@@ -58,50 +58,6 @@ def test_keycloak_connection(keycloak_ports: list = [8081]) -> Optional[str]:
return None
-def get_client_secret_instructions():
- """Print instructions for getting client secret"""
- print("\n" + "=" * 60)
- print("KEYCLOAK SETUP INSTRUCTIONS")
- print("=" * 60)
- print("Your Keycloak is running! Now you need to set it up:")
- print("")
- print("1. Go to http://keycloak:8081 (make sure you have keycloak in /etc/hosts)")
- print("2. Login with username: admin, password: admin")
- print("3. CREATE REALM:")
- print(" - Click dropdown next to 'master' (top-left)")
- print(" - Click 'Create Realm'")
- print(" - Realm name: weaviate-test")
- print(" - Click 'Create'")
- print("")
- print("4. CREATE CLIENT:")
- print(" - Go to 'Clients' → 'Create client'")
- print(" - Client ID: weaviate")
- print(" - Client type: OpenID Connect")
- print(" - Click 'Next'")
- print(" - Enable 'Client authentication'")
- print(" - Enable 'Direct access grants'")
- print(" - Click 'Save'")
- print("")
- print("5. GET CLIENT SECRET:")
- print(" - Go to 'Credentials' tab")
- print(" - Copy the 'Client secret' value")
- print("")
- print("6. CREATE GROUPS:")
- print(" - Go to 'Groups'")
- print(" - Create: /admin-group, /viewer-group, /my-test-group")
- print("")
- print("7. CREATE USERS:")
- print(" - Go to 'Users' → 'Create new user'")
- print(" - Username: test-admin, set password: password123")
- print(" - Add to groups: /admin-group, /my-test-group")
- print(" - Create test-viewer user, add to /viewer-group")
- print("")
- print("8. CONFIGURE GROUP & AUDIENCE CLAIMS:")
- print(" - Go to the 'weaviate' client -> 'Client scopes' -> 'weaviate-dedicated'")
- print(" - Add mapper -> Group Membership & Audience")
- print("=" * 60)
-
-
def get_oidc_token(
keycloak_url: str,
client_secret: str,
@@ -169,21 +125,11 @@ def setup_and_validate_oidc() -> tuple[Optional[str], Optional[str]]:
if not realm_exists:
print(f"\nWarning: The 'weaviate-test' realm doesn't exist yet.")
- print("Manual setup instructions")
-
- get_client_secret_instructions()
- setup_choice = (
- input("\nHave you completed the manual setup? (y/n): ").strip().lower()
+ print(
+ "Please complete the Keycloak setup first with keycloak_helper_script.py, then run this script again."
)
- if setup_choice != "y":
- print(
- "Please complete the Keycloak setup first, then run this script again."
- )
- return None, None
- client_secret = input("Enter Keycloak client secret: ").strip()
- if not client_secret:
- print("Error: Client secret required.")
- return None, None
+ return None, None
+
else:
print(f"OK: weaviate-test realm accessible")
print("\n" + "-" * 30)
From 59f6244f87b1f1c075784cca3ac6a42e50a9d406 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Wed, 27 Aug 2025 13:41:12 +0200
Subject: [PATCH 09/44] Update docs
---
_includes/starter-guides/compression-types.mdx | 2 +-
docs/weaviate/concepts/vector-quantization.md | 6 ++++--
docs/weaviate/configuration/compression/rq-compression.md | 6 ++++--
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/_includes/starter-guides/compression-types.mdx b/_includes/starter-guides/compression-types.mdx
index 4e6571fe..be2878f3 100644
--- a/_includes/starter-guides/compression-types.mdx
+++ b/_includes/starter-guides/compression-types.mdx
@@ -4,6 +4,6 @@
You control the number of segments, segment granularity, and the size of the training set.
- [Scalar Quantization (SQ)](/weaviate/configuration/compression/sq-compression) SQ reduces the size of each vector dimension from 32 bits to 8 bits. SQ trains on your data to create custom buckets for each dimension. This training helps SQ to preserve data characteristics when it maps information from the 32 bit dimensions into 8 bit buckets.
-- [Rotational Quantization (RQ)](/weaviate/configuration/compression/rq-compression) RQ reduces the size of each vector dimension from 32 bits to 8 bits without requiring training. RQ first applies a fast pseudorandom rotation to the vector, then quantizes each dimension to 8 bits. The rotation spreads information evenly across dimensions, enabling up to 98-99% recall without any configuration or training phase.
+- [Rotational Quantization (RQ)](/weaviate/configuration/compression/rq-compression) RQ reduces the size of each vector dimension from 32 bits to 8 bits (or 1 bit) without requiring training. RQ first applies a fast pseudorandom rotation to the vector, then quantizes each dimension. The rotation spreads information evenly across dimensions, enabling up to 98-99% recall without any configuration or training phase.
- [Binary Quantization (BQ)](/weaviate/configuration/compression/bq-compression) BQ reduces the size of each vector dimension to a single bit. This compression algorithm works best for vectors with high dimensionality.
diff --git a/docs/weaviate/concepts/vector-quantization.md b/docs/weaviate/concepts/vector-quantization.md
index 439cd945..3e7924b1 100644
--- a/docs/weaviate/concepts/vector-quantization.md
+++ b/docs/weaviate/concepts/vector-quantization.md
@@ -116,7 +116,9 @@ When SQ is enabled, Weaviate boosts recall by over-fetching compressed results.
:::caution Technical preview
-Rotational quantization (RQ) was added in **`v1.32`** as a **technical preview**.
+**8-bit Rotational quantization (RQ)** was added in **`v1.32`** as a **technical preview**.
+**1-bit Rotational quantization (RQ)** was added in **`v1.33`** as a **technical preview**.
+
This means that the feature is still under development and may change in future releases, including potential breaking changes.
**We do not recommend using this feature in production environments at this time.**
@@ -142,7 +144,7 @@ This means that the feature is still under development and may change in future
- **Data vectors**: Quantized using 1 bit per dimension by storing only the sign of each entry
- **Query vectors**: Scalar quantized using 5 bits per dimension during search
-This asymmetric approach improves recall compared to symmetric 1-bit schemes (such as BQ) by using more precision for query vectors during distance calculation. On datasets well-suited for BQ (like OpenAI embeddings), 1-bit RQ essentially matches BQ recall. It also works well on datasets where BQ performs poorly (such as [SIFT](https://arxiv.org/abs/2504.09081))
+This asymmetric approach improves recall compared to symmetric 1-bit schemes (such as BQ) by using more precision for query vectors during distance calculation. On datasets well-suited for BQ (like OpenAI embeddings), 1-bit RQ essentially matches BQ recall. It also works well on datasets where BQ performs poorly (such as [SIFT](https://arxiv.org/abs/2504.09081)).
### RQ characteristics
diff --git a/docs/weaviate/configuration/compression/rq-compression.md b/docs/weaviate/configuration/compression/rq-compression.md
index fda60947..df90ce65 100644
--- a/docs/weaviate/configuration/compression/rq-compression.md
+++ b/docs/weaviate/configuration/compression/rq-compression.md
@@ -15,13 +15,15 @@ import JavaCode from '!!raw-loader!/\_includes/code/howto/java/src/test/java/io/
:::caution Technical preview
-Rotational quantization (RQ) was added in **`v1.32`** as a **technical preview**.
+**8-bit Rotational quantization (RQ)** was added in **`v1.32`** as a **technical preview**.
+**1-bit Rotational quantization (RQ)** was added in **`v1.33`** as a **technical preview**.
+
This means that the feature is still under development and may change in future releases, including potential breaking changes.
**We do not recommend using this feature in production environments at this time.**
:::
-[**Rotational quantization (RQ)**](../../concepts/vector-quantization.md#rotational-quantization) is a fast untrained vector compression technique:
+[**Rotational quantization (RQ)**](../../concepts/vector-quantization.md#rotational-quantization) is a fast untrained vector compression technique. Two RQ variants are available in Weaviate:
- **8-bit RQ**: Up to 4x compression while retaining almost perfect recall (98-99% on most datasets).
- **1-bit RQ**: Close to 32x compression as dimensionality increases with moderate recall across various datasets.
From b72f6f6502bd83ccfea7655b2f5fe8d9f53c7b1d Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Wed, 27 Aug 2025 14:50:50 +0200
Subject: [PATCH 10/44] Add new operators
---
_includes/code/howto/search.filters.py | 31 ++++++++++++++++
docs/weaviate/api/graphql/filters.md | 18 +++++++---
docs/weaviate/manage-objects/delete.mdx | 4 +--
docs/weaviate/search/filters.md | 47 +++++++++++++++++++++++--
4 files changed, 91 insertions(+), 9 deletions(-)
diff --git a/_includes/code/howto/search.filters.py b/_includes/code/howto/search.filters.py
index 34744326..84c2d43a 100644
--- a/_includes/code/howto/search.filters.py
+++ b/_includes/code/howto/search.filters.py
@@ -136,6 +136,37 @@
# End test
+# ==========================================
+# ===== ContainsNoneFilter =====
+# ==========================================
+
+# ContainsNoneFilter
+from weaviate.classes.query import Filter
+
+jeopardy = client.collections.get("JeopardyQuestion")
+
+# highlight-start
+token_list = ["blue", "red"]
+# highlight-end
+
+response = jeopardy.query.fetch_objects(
+ # highlight-start
+ # Find objects where the `question` property contains none of the strings in `token_list`
+ filters=Filter.by_property("question").contains_none(token_list),
+ # highlight-end
+ limit=3
+)
+
+for o in response.objects:
+ print(o.properties)
+# END ContainsNoneFilter
+
+# Test results
+assert response.objects[0].collection == "JeopardyQuestion"
+assert (token_list[0] not in response.objects[0].properties["question"].lower() and token_list[1] not in response.objects[0].properties["question"].lower())
+# End test
+
+
# ==========================================
# ===== Partial Match Filter =====
# ==========================================
diff --git a/docs/weaviate/api/graphql/filters.md b/docs/weaviate/api/graphql/filters.md
index 220b1138..2da35f1a 100644
--- a/docs/weaviate/api/graphql/filters.md
+++ b/docs/weaviate/api/graphql/filters.md
@@ -59,6 +59,7 @@ The `where` filter is an [algebraic object](https://en.wikipedia.org/wiki/Algebr
- `Operator` (which takes one of the following values)
- `And`
- `Or`
+ - `Not`
- `Equal`
- `NotEqual`
- `GreaterThan`
@@ -70,6 +71,7 @@ The `where` filter is an [algebraic object](https://en.wikipedia.org/wiki/Algebr
- `IsNull`
- `ContainsAny` (*Only for array and text properties)
- `ContainsAll` (*Only for array and text properties)
+ - `ContainsNone` (*Only for array and text properties)
- `Path`: Is a list of strings in [XPath](https://en.wikipedia.org/wiki/XPath#Abbreviated_syntax) style, indicating the property name of the collection.
- If the property is a cross-reference, the path should be followed as a list of strings. For a `inPublication` reference property that refers to `Publication` collection, the path selector for `name` will be `["inPublication", "Publication", "name"]`.
- `valueType`
@@ -239,15 +241,15 @@ Each `Like` filter iterates over the entire inverted index for that property. Th
Currently, the `Like` filter is not able to match wildcard characters (`?` and `*`) as literal characters. For example, it is currently not possible to only match the string `car*` and not `car`, `care` or `carpet`. This is a known limitation and may be addressed in future versions of Weaviate.
-### `ContainsAny` / `ContainsAll`
+### `ContainsAny` / `ContainsAll` / `ContainsNone`
-The `ContainsAny` and `ContainsAll` operators filter objects using values of an array as criteria.
+The `ContainsAny`, `ContainsAll` and `ContainsNone` operators filter objects using values of an array as criteria.
Both operators expect an array of values and return objects that match based on the input values.
-:::note `ContainsAny` and `ContainsAll` notes:
-- The `ContainsAny` and `ContainsAll` operators treat texts as an array. The text is split into an array of tokens based on the chosen tokenization scheme, and the search is performed on that array.
-- When using `ContainsAny` or `ContainsAll` with the REST api for [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects), the text array must be specified with the `valueTextArray` argument. This is different from the usage in search, where the `valueText` argument that can be used.
+:::note `ContainsAny`/`ContainsAll`/`ContainsNone` notes:
+- The `ContainsAny`, `ContainsAll` and `ContainsNone` operators treat texts as an array. The text is split into an array of tokens based on the chosen tokenization scheme, and the search is performed on that array.
+- When using `ContainsAny`, `ContainsAll` and `ContainsNone` with the REST api for [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects), the text array must be specified with the `valueTextArray` argument. This is different from the usage in search, where the `valueText` argument that can be used.
:::
@@ -265,6 +267,12 @@ A `ContainsAny` query on a path of `["languages_spoken"]` with a value of `["Chi
Using the same dataset of `Person` objects as above, a `ContainsAll` query on a path of `["languages_spoken"]` with a value of `["Chinese", "French", "English"]` will return objects where all three of those languages are present in the `languages_spoken` array.
+#### `ContainsNone`
+
+`ContainsNone` returns objects where none of the values from the input array are present.
+
+Using the same dataset of `Person` objects as above, a `ContainsNone` query on a path of `["languages_spoken"]` with a value of `["Chinese", "French", "English"]` will return objects where **none** of those languages are present in the `languages_spoken` array. For example, a person who speaks only Spanish would be returned, but a person who speaks English would be excluded.
+
## Filter performance
import RangeFilterPerformanceNote from '/_includes/range-filter-performance-note.mdx';
diff --git a/docs/weaviate/manage-objects/delete.mdx b/docs/weaviate/manage-objects/delete.mdx
index bc947eef..031949d6 100644
--- a/docs/weaviate/manage-objects/delete.mdx
+++ b/docs/weaviate/manage-objects/delete.mdx
@@ -192,9 +192,9 @@ To delete objects that match a set of criteria, specify the collection and a [`w
-### ContainsAny / ContainsAll
+### ContainsAny / ContainsAll / ContainsNone
-Use `ContainsAny` / `ContainsAll` filters to delete of objects by a set of criteria.
+Use `ContainsAny` / `ContainsAll` / `ContainsNone` filters to delete of objects by a set of criteria.
diff --git a/docs/weaviate/search/filters.md b/docs/weaviate/search/filters.md
index b89deb2c..41450ec8 100644
--- a/docs/weaviate/search/filters.md
+++ b/docs/weaviate/search/filters.md
@@ -515,9 +515,52 @@ The output is like this:
-## `ContainsAny` and `ContainsAll` with batch delete
+## `ContainsNone` Filter
-If you want to do a batch delete, see [Delete objects](../manage-objects/delete.mdx#containsany--containsall).
+The `ContainsNone` operator works on text properties and take an array of values as input. It will match objects where the property **contains none** of the values in the array.
+
+
+
+
+
+
+
+```typescript
+// TypeScript/JavaScript support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+
+ Example response
+
+The output is like this:
+
+
+
+## `ContainsAny`, `ContainsAll` and `ContainsNone` with batch delete
+
+If you want to do a batch delete, see [Delete objects](../manage-objects/delete.mdx#containsany--containsall--containsnone).
## Filter text on partial matches
From 13e0745c55a3963dbb0a6c4bcdb7b9d13b67821c Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Wed, 27 Aug 2025 17:08:40 +0200
Subject: [PATCH 11/44] Update docs
---
docs/weaviate/manage-objects/import.mdx | 2 +-
tests/docker-compose-anon.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/weaviate/manage-objects/import.mdx b/docs/weaviate/manage-objects/import.mdx
index b0961a76..edcdf003 100644
--- a/docs/weaviate/manage-objects/import.mdx
+++ b/docs/weaviate/manage-objects/import.mdx
@@ -106,7 +106,7 @@ This means that the feature is still under development and may change in future
:::
-Here's how to import objects into a collection named `MyCollection`. The client will handle sending the data in optimal chunks.
+Here's how to import objects into a collection named `MyCollection` using [server-side batch imports](../concepts/data-import.mdx#server-side-batching). The client will handle sending the data in optimal chunks.
diff --git a/tests/docker-compose-anon.yml b/tests/docker-compose-anon.yml
index 4052eda7..ba8a7116 100644
--- a/tests/docker-compose-anon.yml
+++ b/tests/docker-compose-anon.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.32.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.0-7f4e699.arm64@sha256:35def4b6e3a80f1b77c7a69715a09de47d2c17dd7c698da94b1d4835fd6b8561
ports:
- 8080:8080
- 50051:50051
From 7c0a09f2c3ba8f039a1dadb8cd2158e6662ab19b Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Thu, 28 Aug 2025 08:38:57 +0200
Subject: [PATCH 12/44] Update docs
---
.../python/howto.configure.rbac.oidc.users.py | 8 ++--
.../code/python/howto.configure.rbac.roles.py | 30 ++++++++++--
docs/weaviate/configuration/rbac/index.mdx | 25 ++++++++++
.../configuration/rbac/manage-roles.mdx | 46 ++++++++++++++++++-
tests/docker-compose-rbac.yml | 2 +-
5 files changed, 100 insertions(+), 11 deletions(-)
diff --git a/_includes/code/python/howto.configure.rbac.oidc.users.py b/_includes/code/python/howto.configure.rbac.oidc.users.py
index 03fcf75a..67499586 100644
--- a/_includes/code/python/howto.configure.rbac.oidc.users.py
+++ b/_includes/code/python/howto.configure.rbac.oidc.users.py
@@ -31,11 +31,11 @@
# START AssignOidcUserRole
client.users.oidc.assign_roles(user_id="custom-user", role_names=["testRole", "viewer"])
# END AssignOidcUserRole
-assert "testRole" in client.users.oidc.get_assigned_roles("custom-user")
-assert "viewer" in client.users.oidc.get_assigned_roles("custom-user")
+assert "testRole" in client.users.oidc.get_assigned_roles(user_id="custom-user")
+assert "viewer" in client.users.oidc.get_assigned_roles(user_id="custom-user")
# START ListOidcUserRoles
-user_roles = client.users.oidc.get_assigned_roles("custom-user")
+user_roles = client.users.oidc.get_assigned_roles(user_id="custom-user")
for role in user_roles:
print(role)
@@ -46,6 +46,6 @@
# START RevokeOidcUserRoles
client.users.oidc.revoke_roles(user_id="custom-user", role_names="testRole")
# END RevokeOidcUserRoles
-assert "testRole" not in client.users.oidc.get_assigned_roles("custom-user")
+assert "testRole" not in client.users.oidc.get_assigned_roles(user_id="custom-user")
client.close()
diff --git a/_includes/code/python/howto.configure.rbac.roles.py b/_includes/code/python/howto.configure.rbac.roles.py
index 9b28ebe8..13c0fa59 100644
--- a/_includes/code/python/howto.configure.rbac.roles.py
+++ b/_includes/code/python/howto.configure.rbac.roles.py
@@ -17,7 +17,7 @@
all_roles = client.roles.list_all()
for role_name, _ in all_roles.items():
- if role_name not in ["viewer", "root", "admin"]:
+ if role_name not in ["viewer", "root", "admin", "read-only"]:
client.roles.delete(role_name=role_name)
# # START CreateRole
@@ -222,8 +222,7 @@
permissions = client.roles.get(role_name="testRole")
assert any(
- permission.alias == "TargetAlias*"
- for permission in permissions.alias_permissions
+ permission.alias == "TargetAlias*" for permission in permissions.alias_permissions
)
client.roles.delete("testRole")
@@ -247,13 +246,34 @@
permissions = client.roles.get(role_name="testRole")
assert any(
- permission.collection == "TargetCollection*" and
- permission.shard == "TargetShard*"
+ permission.collection == "TargetCollection*" and permission.shard == "TargetShard*"
for permission in permissions.replicate_permissions
)
client.roles.delete("testRole")
+# START AddGroupsPermission
+from weaviate.classes.rbac import Permissions
+
+permissions = [
+ Permissions.Groups.oidc(
+ group="TargetGroup*", # Applies to all groups starting with "TargetGroup"
+ read=True, # Allow reading group information
+ assign_and_revoke=True, # Allow assigning and revoking group memberships
+ ),
+]
+
+client.roles.create(role_name="testRole", permissions=permissions)
+# END AddGroupsPermission
+
+permissions = client.roles.get(role_name="testRole")
+assert any(
+ permission.group == "TargetGroup*"
+ for permission in permissions.groups_permissions
+)
+
+client.roles.delete("testRole")
+
permissions = [
Permissions.collections(
collection="TargetCollection*",
diff --git a/docs/weaviate/configuration/rbac/index.mdx b/docs/weaviate/configuration/rbac/index.mdx
index 45dfc78d..2ddbf9d9 100644
--- a/docs/weaviate/configuration/rbac/index.mdx
+++ b/docs/weaviate/configuration/rbac/index.mdx
@@ -400,6 +400,31 @@ Permissions can be defined with the following resources, access levels and optio
+
+
+
+ Groups
+
+
+
+
Read groups
+
Assign and revoke group membership
+
+
+
Group name filter:
+
+
+ string or regex: specifies which groups can be managed
+
+
+
Group type filter:
+
+
+ oidc (only OIDC user groups are supported at the moment)
+
+
+
+
diff --git a/docs/weaviate/configuration/rbac/manage-roles.mdx b/docs/weaviate/configuration/rbac/manage-roles.mdx
index 455d8f9d..3b50c6a6 100644
--- a/docs/weaviate/configuration/rbac/manage-roles.mdx
+++ b/docs/weaviate/configuration/rbac/manage-roles.mdx
@@ -89,6 +89,12 @@ Permissions for these resource types can be assigned to roles:
1. [**Node Data Access**](#nodes-permissions)
+1. [**Collection alias**](#aliases-permissions)
+
+1. [**Replications**](#replications-permissions)
+
+1. [**Groups**](#groups-permissions)
+
#### Create a role with `Role Management` permissions {#role-management-permissions}
This example creates a role called `testRole` with permissions to:
@@ -404,7 +410,7 @@ This example creates a role called `testRole` with permissions to:
-#### Create a role with `Collection Aliases` permissions {#aliases-permissions}
+#### Create a role with `Collection Alias` permissions {#aliases-permissions}
This example creates a role called `testRole` with permissions to:
@@ -480,6 +486,44 @@ This example creates a role called `testRole` with permissions to:
+#### Create a role with `Groups` permissions {#groups-permissions}
+
+This example creates a role called `testRole` with permissions to:
+
+- Read information about and assign/revoke group membership for OIDC groups starting with `TargetGroup`.
+
+
+
+
+
+
+
+```typescript
+// TS/JS support coming soon
+```
+
+
+
+
+```go
+// Go support coming soon
+```
+
+
+
+
+```java
+// Java support coming soon
+```
+
+
+
+
### Grant additional permissions
Additional permissions can be granted to a role at any time. The role must already exist.
diff --git a/tests/docker-compose-rbac.yml b/tests/docker-compose-rbac.yml
index bc029512..acca6207 100644
--- a/tests/docker-compose-rbac.yml
+++ b/tests/docker-compose-rbac.yml
@@ -7,7 +7,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-dev-4d2ab70.arm64
+ image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.0
ports:
- 8580:8080
- 50551:50051
From e2d146f2664696539d8bcf496584500013bfce08 Mon Sep 17 00:00:00 2001
From: Ivan Despot <66276597+g-despot@users.noreply.github.com>
Date: Thu, 28 Aug 2025 14:59:30 +0200
Subject: [PATCH 13/44] Update docs
---
.../howto/go/docs/manage-data.aliases_test.go | 59 ++-
.../docs/manage-data.collection-aliases.java | 74 ++--
_includes/code/howto/manage-data.aliases.py | 55 ++-
_includes/code/howto/manage-data.aliases.ts | 137 +++---
docs/weaviate/config-refs/collections.mdx | 7 +-
.../manage-collections/collection-aliases.mdx | 9 +-
.../managing-collections/index.mdx | 60 +--
.../_includes/collection_alias_tutorial.png | Bin 0 -> 222210 bytes
docs/weaviate/tutorials/aliases.mdx | 405 ++++++++++++++++++
docs/weaviate/tutorials/index.mdx | 7 +
sidebars.js | 1 +
11 files changed, 615 insertions(+), 199 deletions(-)
create mode 100644 docs/weaviate/tutorials/_includes/collection_alias_tutorial.png
create mode 100644 docs/weaviate/tutorials/aliases.mdx
diff --git a/_includes/code/howto/go/docs/manage-data.aliases_test.go b/_includes/code/howto/go/docs/manage-data.aliases_test.go
index 0d7a9b2c..3ff4ef9b 100644
--- a/_includes/code/howto/go/docs/manage-data.aliases_test.go
+++ b/_includes/code/howto/go/docs/manage-data.aliases_test.go
@@ -19,12 +19,15 @@ func Test_ManageAliases(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
+ // START ConnectToWeaviate
+ // Connect to local Weaviate instance
config := weaviate.Config{
Scheme: "http",
Host: "localhost:8080",
}
client, err := weaviate.NewClient(config)
require.NoError(t, err)
+ // END ConnectToWeaviate
// Check if Weaviate is ready
ready, err := client.Misc().ReadyChecker().Do(ctx)
@@ -40,7 +43,7 @@ func Test_ManageAliases(t *testing.T) {
}
// Clean up collections
- collectionsToDelete := []string{"Articles", "ArticlesV2", "Products_v1", "Products_v2"}
+ collectionsToDelete := []string{"Articles", "ArticlesV2", "Products_v1", "Products_v2", "MyArticles"}
for _, className := range collectionsToDelete {
_ = client.Schema().ClassDeleter().WithClassName(className).Do(ctx)
}
@@ -215,16 +218,6 @@ func Test_ManageAliases(t *testing.T) {
fmt.Printf("Found: %v\n", title)
}
}
-
- // Add a new property using the alias
- property := &models.Property{
- Name: "author",
- DataType: schema.DataTypeText.PropString(),
- }
- err = client.Schema().PropertyCreator().
- WithClassName("MyArticles").
- WithProperty(property).
- Do(ctx)
// UseAlias END
require.NoError(t, err)
@@ -233,8 +226,8 @@ func Test_ManageAliases(t *testing.T) {
})
t.Run("migration example", func(t *testing.T) {
- // MigrationExample START
- // Step 1: Create original collection with data
+ // START Step1CreateOriginal
+ // Create original collection with data
err := client.Schema().ClassCreator().WithClass(&models.Class{
Class: "Products_v1",
Vectorizer: "none",
@@ -265,16 +258,38 @@ func Test_ManageAliases(t *testing.T) {
Do(ctx)
require.NoError(t, err)
+ // END Step1CreateOriginal
- // Step 2: Create alias pointing to current collection
+ // START Step2CreateAlias
+ // Create alias pointing to current collection
err = client.Alias().AliasCreator().WithAlias(&alias.Alias{
Alias: "Products",
Class: "Products_v1",
}).Do(ctx)
require.NoError(t, err)
+ // END Step2CreateAlias
+
+ // START MigrationUseAlias
+ // Your application always uses the alias name "Products"
+ // Insert data through the alias
+ _, err = client.Data().Creator().WithClassName("Products").WithProperties(map[string]interface{}{
+ "name": "Product C",
+ "price": 300,
+ }).Do(ctx)
+ require.NoError(t, err)
+
+ // Query through the alias
+ resp, err := client.Data().ObjectsGetter().WithClassName("Products").WithLimit(5).Do(ctx)
+ require.NoError(t, err)
+ for _, obj := range resp {
+ props := obj.Properties.(map[string]interface{})
+ t.Logf("Product: %v, Price: $%v", props["name"], props["price"])
+ }
+ // END MigrationUseAlias
- // Step 3: Create new collection with updated schema
+ // START Step3NewCollection
+ // Create new collection with updated schema
err = client.Schema().ClassCreator().WithClass(&models.Class{
Class: "Products_v2",
Vectorizer: "none",
@@ -286,8 +301,10 @@ func Test_ManageAliases(t *testing.T) {
}).Do(ctx)
require.NoError(t, err)
+ // END Step3NewCollection
- // Step 4: Migrate data to new collection
+ // START Step4MigrateData
+ // Migrate data to new collection
oldData, err := client.Data().ObjectsGetter().
WithClassName("Products_v1").
Do(ctx)
@@ -306,8 +323,10 @@ func Test_ManageAliases(t *testing.T) {
require.NoError(t, err)
}
+ // END Step4MigrateData
- // Step 5: Switch alias to new collection (instant switch!)
+ // START Step5UpdateAlias
+ // Switch alias to new collection (instant switch!)
err = client.Alias().AliasUpdater().WithAlias(&alias.Alias{
Alias: "Products",
Class: "Products_v2",
@@ -326,10 +345,12 @@ func Test_ManageAliases(t *testing.T) {
if len(result) > 0 {
fmt.Printf("%v\n", result[0].Properties) // Will include the new "category" field
}
+ // END Step5UpdateAlias
- // Step 6: Clean up old collection after verification
+ // START Step6Cleanup
+ // Clean up old collection after verification
err = client.Schema().ClassDeleter().WithClassName("Products_v1").Do(ctx)
- // MigrationExample END
+ // END Step6Cleanup
// Error is expected if collection has data or other dependencies
// In production, you'd want to ensure the collection is empty first
diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java
index 20ebb606..33449886 100644
--- a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java
+++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java
@@ -30,12 +30,15 @@ class AliasesTest {
@BeforeAll
public static void beforeAll() {
+ // START ConnectToWeaviate
+ // Connect to local Weaviate instance
String scheme = "http";
String host = "localhost";
String port = "8080";
Config config = new Config(scheme, host + ":" + port);
client = new WeaviateClient(config);
+ // END ConnectToWeaviate
}
@BeforeEach
@@ -95,14 +98,10 @@ public void shouldListAllAliases() {
Result