diff --git a/.gitignore b/.gitignore index 8a71d990..2e4793da 100644 --- a/.gitignore +++ b/.gitignore @@ -259,5 +259,6 @@ __marimo__/ # Streamlit .streamlit/secrets.toml -_includes/code/csharp/bin/ -_includes/code/csharp/obj/ \ No newline at end of file +# C# code +_includes/code/csharp/bin +_includes/code/csharp/obj diff --git a/_includes/code/automated-testing.py b/_includes/code/automated-testing.py index ca2bf3db..d29bc22a 100644 --- a/_includes/code/automated-testing.py +++ b/_includes/code/automated-testing.py @@ -17,6 +17,11 @@ headers=headers, ) +# Instantiate the v4 Weaviate client using the local helper. +# client = weaviate.connect_to_local( +# headers=headers, +# ) + client.collections.delete("JeopardyQuestion") client.collections.delete("JeopardyCategory") diff --git a/_includes/code/howto/configure-rq/rq-compression-v3.ts b/_includes/code/howto/configure-rq/rq-compression-v3.ts index 973e46fb..59a623c1 100644 --- a/_includes/code/howto/configure-rq/rq-compression-v3.ts +++ b/_includes/code/howto/configure-rq/rq-compression-v3.ts @@ -11,7 +11,7 @@ import { configure } from 'weaviate-client'; const client = await weaviate.connectToLocal({ headers: { - "X-OpenAI-Api-Key": process.env.OPENAI_API_KEY as string, // Replace with your OpenAI API key + "X-OpenAI-Api-Key": process.env.OPENAI_API_KEY as string, // Replace with your OpenAI API key } }) @@ -27,7 +27,7 @@ await client.collections.delete("MyCollection") await client.collections.create({ name: "MyCollection", - vectorizers : configure.vectors.text2VecOpenAI({ + vectorizers: configure.vectors.text2VecOpenAI({ // highlight-start quantizer: configure.vectorIndex.quantizer.rq() // highlight-end @@ -38,6 +38,29 @@ await client.collections.create({ }) // END EnableRQ +// ============================== +// ===== EnableRQ 1-BIT ======== +// ============================== + +await client.collections.delete("MyCollection") + +// START 1BitEnableRQ + +await client.collections.create({ + name: "MyCollection", + vectorizers: configure.vectors.text2VecOpenAI({ + // highlight-start + quantizer: configure.vectorIndex.quantizer.rq({ + bits: 1, + }) + // highlight-end + }), + properties: [ + { name: "title", dataType: weaviate.configure.dataType.TEXT } + ] +}) +// END 1BitEnableRQ + // ============================== // ===== EnableRQ with Options ===== // ============================== @@ -51,7 +74,7 @@ await client.collections.create({ vectorizers: configure.vectors.text2VecOpenAI({ // highlight-start quantizer: configure.vectorIndex.quantizer.rq({ - bits: 8, // Number of bits, only 8 is supported for now + bits: 8, // Number of bits }), // highlight-end }), diff --git a/_includes/code/howto/configure-rq/rq-compression-v4.py b/_includes/code/howto/configure-rq/rq-compression-v4.py index 29568301..d69628f1 100644 --- a/_includes/code/howto/configure-rq/rq-compression-v4.py +++ b/_includes/code/howto/configure-rq/rq-compression-v4.py @@ -41,6 +41,50 @@ ) # END EnableRQ +# ============================== +# ===== EnableRQ 1-BIT ======== +# ============================== + +client.collections.delete("MyCollection") + +# START 1BitEnableRQ +from weaviate.classes.config import Configure, Property, DataType + +client.collections.create( + name="MyCollection", + vector_config=Configure.Vectors.text2vec_openai( + # highlight-start + quantizer=Configure.VectorIndex.Quantizer.rq(bits=1) + # highlight-end + ), + properties=[ + Property(name="title", data_type=DataType.TEXT), + ], +) +# END 1BitEnableRQ + +# ========================= +# ===== Uncompressed ===== +# ========================= + +client.collections.delete("MyCollection") + +# START Uncompressed +from weaviate.classes.config import Configure, Property, DataType + +client.collections.create( + name="MyCollection", + vector_config=Configure.Vectors.text2vec_openai( + # highlight-start + quantizer=Configure.VectorIndex.Quantizer.none() + # highlight-end + ), + properties=[ + Property(name="title", data_type=DataType.TEXT), + ], +) +# END Uncompressed + # ============================== # ===== EnableRQ with Options ===== # ============================== @@ -55,7 +99,7 @@ vector_config=Configure.Vectors.text2vec_openai( # highlight-start quantizer=Configure.VectorIndex.Quantizer.rq( - bits=8, # Optional: Number of bits, only 8 is supported for now + bits=8, # Optional: Number of bits rescore_limit=20, # Optional: Number of candidates to fetch before rescoring ), # highlight-end @@ -70,6 +114,19 @@ # ===== UPDATE SCHEMA ===== # ============================== +client.collections.delete("MyCollection") +client.collections.create( + name="MyCollection", + vector_config=Configure.Vectors.text2vec_openai( + # highlight-start + quantizer=Configure.VectorIndex.Quantizer.none(), + # highlight-end + ), + properties=[ + Property(name="title", data_type=DataType.TEXT), + ], +) + # START UpdateSchema from weaviate.classes.config import Reconfigure @@ -78,14 +135,43 @@ vector_config=Reconfigure.Vectors.update( name="default", vector_index_config=Reconfigure.VectorIndex.hnsw( - quantizer=Reconfigure.VectorIndex.Quantizer.rq( - rescore_limit=20, # Optional: Number of candidates to fetch before rescoring - ), + quantizer=Reconfigure.VectorIndex.Quantizer.rq(), ), ) ) # END UpdateSchema +# ================================ +# ===== UPDATE SCHEMA 1-BIT ===== +# ================================ + +client.collections.delete("MyCollection") +client.collections.create( + name="MyCollection", + vector_config=Configure.Vectors.text2vec_openai( + # highlight-start + quantizer=Configure.VectorIndex.Quantizer.none(), + # highlight-end + ), + properties=[ + Property(name="title", data_type=DataType.TEXT), + ], +) + +# START 1BitUpdateSchema +from weaviate.classes.config import Reconfigure + +collection = client.collections.use("MyCollection") +collection.config.update( + vector_config=Reconfigure.Vectors.update( + name="default", + vector_index_config=Reconfigure.VectorIndex.hnsw( + quantizer=Reconfigure.VectorIndex.Quantizer.rq(bits=1), + ), + ) +) +# END 1BitUpdateSchema + from weaviate.collections.classes.config import _RQConfig config = client.collections.use("MyCollection").config.get() diff --git a/_includes/code/howto/go/docs/configure/compression.rq_test.go b/_includes/code/howto/go/docs/configure/compression.rq_test.go index 72b844f8..cc9836b1 100644 --- a/_includes/code/howto/go/docs/configure/compression.rq_test.go +++ b/_includes/code/howto/go/docs/configure/compression.rq_test.go @@ -89,6 +89,56 @@ func TestRQConfiguration(t *testing.T) { assert.Equal(t, true, rqConfig["enabled"]) }) + t.Run("Enable 1-bit RQ", func(t *testing.T) { + className := "MyCollectionRQDefault" + // Delete the collection if it already exists to ensure a clean start + err := client.Schema().ClassDeleter().WithClassName(className).Do(context.Background()) + if err != nil { + // This is not a fatal error, the collection might not exist + log.Printf("Could not delete collection '%s', it might not exist: %v\n", className, err) + } + + // START 1BitEnableRQ + // Define the configuration for RQ. Setting 'enabled' to true + // highlight-start + rq_config := map[string]interface{}{ + "enabled": true, + "bits": 1, + } + // highlight-end + + // Define the class schema + class := &models.Class{ + Class: className, + Vectorizer: "text2vec-openai", + // highlight-start + // Assign the RQ configuration to the vector index config + VectorIndexConfig: map[string]interface{}{ + "rq": rq_config, + }, + // highlight-end + } + + // Create the collection in Weaviate + err = client.Schema().ClassCreator(). + WithClass(class). + Do(context.Background()) + // END 1BitEnableRQ + require.NoError(t, err) + + // Assertions to verify the configuration + classInfo, err := client.Schema().ClassGetter().WithClassName(className).Do(ctx) + require.NoError(t, err) + require.NotNil(t, classInfo) + + vic, ok := classInfo.VectorIndexConfig.(map[string]interface{}) + require.True(t, ok) + rqConfig, ok := vic["rq"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, true, rqConfig["enabled"]) + assert.Equal(t, float64(1), rqConfig["bits"]) + }) + t.Run("Enable RQ with Options", func(t *testing.T) { className := "MyCollectionRQWithOptions" // Delete the collection to recreate it with new options @@ -102,7 +152,7 @@ func TestRQConfiguration(t *testing.T) { // highlight-start rq_with_options_config := map[string]interface{}{ "enabled": true, - "bits": 8, // Optional: Number of bits, only 8 is supported for now + "bits": 8, // Optional: Number of bits "rescoreLimit": 20, // Optional: Number of candidates to fetch before rescoring } // highlight-end @@ -176,10 +226,9 @@ func TestRQConfiguration(t *testing.T) { // Get the current vector index configuration cfg := class.VectorIndexConfig.(map[string]interface{}) - // Add RQ configuration to enable scalar quantization + // Add RQ configuration to enable quantization cfg["rq"] = map[string]interface{}{ - "enabled": true, - "rescoreLimit": 20, // Optional: Number of candidates to fetch before rescoring + "enabled": true, } // Update the class configuration @@ -207,4 +256,71 @@ func TestRQConfiguration(t *testing.T) { assert.Equal(t, true, rqConfig["enabled"]) assert.Equal(t, float64(20), rqConfig["rescoreLimit"]) }) + + t.Run("Enable 1-bit RQ on Existing Collection", func(t *testing.T) { + className := "MyExistingCollection" + + // First, create a collection without RQ + err := client.Schema().ClassDeleter().WithClassName(className).Do(context.Background()) + if err != nil { + log.Printf("Could not delete collection '%s', it might not exist: %v\n", className, err) + } + + // Create initial collection without RQ + initialClass := &models.Class{ + Class: className, + Vectorizer: "text2vec-openai", + VectorIndexConfig: map[string]interface{}{ + "distance": "cosine", + }, + } + + err = client.Schema().ClassCreator(). + WithClass(initialClass). + Do(context.Background()) + require.NoError(t, err) + + // START 1BitUpdateSchema + // Get the existing collection configuration + class, err := client.Schema().ClassGetter(). + WithClassName(className).Do(context.Background()) + + if err != nil { + log.Fatalf("get class for vec idx cfg update: %v", err) + } + + // Get the current vector index configuration + cfg := class.VectorIndexConfig.(map[string]interface{}) + + // Add RQ configuration to enable scalar quantization + cfg["rq"] = map[string]interface{}{ + "enabled": true, + "bits": 1, + } + + // Update the class configuration + class.VectorIndexConfig = cfg + + // Apply the updated configuration to the collection + err = client.Schema().ClassUpdater(). + WithClass(class).Do(context.Background()) + + if err != nil { + log.Fatalf("update class to use rq: %v", err) + } + // END 1BitUpdateSchema + + // Verify the RQ configuration was applied + updatedClass, err := client.Schema().ClassGetter(). + WithClassName(className).Do(context.Background()) + require.NoError(t, err) + + vic, ok := updatedClass.VectorIndexConfig.(map[string]interface{}) + require.True(t, ok) + + rqConfig, ok := vic["rq"].(map[string]interface{}) + require.True(t, ok) + assert.Equal(t, true, rqConfig["enabled"]) + assert.Equal(t, float64(1), rqConfig["bits"]) + }) } diff --git a/_includes/code/howto/go/docs/configure/rbac.oidc.users_test.go b/_includes/code/howto/go/docs/configure/rbac.oidc.users_test.go new file mode 100644 index 00000000..72ed275c --- /dev/null +++ b/_includes/code/howto/go/docs/configure/rbac.oidc.users_test.go @@ -0,0 +1,157 @@ +package docs + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/weaviate/weaviate-go-client/v5/weaviate" + "github.com/weaviate/weaviate-go-client/v5/weaviate/auth" + "github.com/weaviate/weaviate-go-client/v5/weaviate/rbac" + "github.com/weaviate/weaviate/entities/models" +) + +func TestOIDCUserManagement(t *testing.T) { + // TODO[g-despot]: OIDC testing not yet implemented + // This test requires OIDC provider configuration + // Uncomment and configure when OIDC is available + // t.Skip("OIDC testing not yet implemented") + + ctx := context.Background() + + // ============================== + // ===== CONNECT ===== + // ============================== + + // START AdminClient + cfg := weaviate.Config{ + Host: "localhost:8580", + Scheme: "http", + AuthConfig: auth.ApiKey{Value: "root-user-key"}, + } + + // Connect to Weaviate as root user + client, err := weaviate.NewClient(cfg) + // END AdminClient + require.NoError(t, err) + + // Verify connection + ready, err := client.Misc().ReadyChecker().Do(ctx) + require.NoError(t, err) + require.True(t, ready) + + // Clean up any existing test role + client.Roles().Deleter().WithName("testRole").Do(ctx) + + // Create test role for OIDC user assignment + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + models.PermissionActionCreateCollections, + }, + }, + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadData, + models.PermissionActionCreateData, + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + require.NoError(t, err) + + t.Cleanup(func() { + client.Roles().Deleter().WithName("testRole").Do(ctx) + }) + + t.Run("AssignOidcUserRole", func(t *testing.T) { + // START AssignOidcUserRole + err = client.Users().OIDC().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole", "viewer"). + Do(ctx) + // END AssignOidcUserRole + + require.NoError(t, err) + + // Verify roles were assigned + userRoles, err := client.Users().OIDC().RolesGetter(). + WithUserID("custom-user"). + WithIncludeFullRoles(true). + Do(ctx) + require.NoError(t, err) + + roleNames := make([]string, len(userRoles)) + for i, role := range userRoles { + roleNames[i] = role.Name + } + assert.Contains(t, roleNames, "testRole") + assert.Contains(t, roleNames, "viewer") + }) + + t.Run("ListOidcUserRoles", func(t *testing.T) { + // Ensure roles are assigned + err = client.Users().OIDC().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole", "viewer"). + Do(ctx) + require.NoError(t, err) + + // START ListOidcUserRoles + userRoles, err := client.Users().OIDC().RolesGetter(). + WithUserID("custom-user"). + WithIncludeFullRoles(true). + Do(ctx) + + for _, role := range userRoles { + fmt.Println(role) + } + // END ListOidcUserRoles + + require.NoError(t, err) + + roleNames := make([]string, len(userRoles)) + for i, role := range userRoles { + roleNames[i] = role.Name + } + assert.Contains(t, roleNames, "testRole") + assert.Contains(t, roleNames, "viewer") + }) + + t.Run("RevokeOidcUserRoles", func(t *testing.T) { + // Ensure roles are assigned first + err = client.Users().OIDC().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole", "viewer"). + Do(ctx) + require.NoError(t, err) + + // START RevokeOidcUserRoles + err = client.Users().OIDC().RolesRevoker(). + WithUserID("custom-user"). + WithRoles("testRole"). + Do(ctx) + // END RevokeOidcUserRoles + + require.NoError(t, err) + + // Verify role was revoked + userRoles, err := client.Users().OIDC().RolesGetter(). + WithUserID("custom-user"). + WithIncludeFullRoles(true). + Do(ctx) + require.NoError(t, err) + + roleNames := make([]string, len(userRoles)) + for i, role := range userRoles { + roleNames[i] = role.Name + } + assert.NotContains(t, roleNames, "testRole", "testRole should be revoked") + }) +} diff --git a/_includes/code/howto/go/docs/configure/rbac.roles_test.go b/_includes/code/howto/go/docs/configure/rbac.roles_test.go new file mode 100644 index 00000000..d1050be1 --- /dev/null +++ b/_includes/code/howto/go/docs/configure/rbac.roles_test.go @@ -0,0 +1,647 @@ +package docs + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/weaviate/weaviate-go-client/v5/weaviate" + "github.com/weaviate/weaviate-go-client/v5/weaviate/auth" + "github.com/weaviate/weaviate-go-client/v5/weaviate/rbac" + "github.com/weaviate/weaviate/entities/models" +) + +func TestRoleManagement(t *testing.T) { + ctx := context.Background() + + // ============================== + // ===== CONNECT ===== + // ============================== + + cfg := weaviate.Config{ + Host: "localhost:8580", + Scheme: "http", + AuthConfig: auth.ApiKey{Value: "root-user-key"}, + } + + client, err := weaviate.NewClient(cfg) + require.NoError(t, err) + + // Verify connection + ready, err := client.Misc().ReadyChecker().Do(ctx) + require.NoError(t, err) + require.True(t, ready) + + // Clean up any existing test roles before tests + allRoles, err := client.Roles().AllGetter().Do(ctx) + if err == nil { + for _, role := range allRoles { + if role.Name != "viewer" && role.Name != "root" && role.Name != "admin" && role.Name != "read-only" { + client.Roles().Deleter().WithName(role.Name).Do(ctx) + } + } + } + + // Clean up helper function for deferred cleanup + cleanup := func(roleName string) { + client.Roles().Deleter().WithName(roleName).Do(ctx) + } + + t.Run("AdminClient", func(t *testing.T) { + // START AdminClient + cfg := weaviate.Config{ + Host: "localhost:8580", + Scheme: "http", + AuthConfig: auth.ApiKey{Value: "root-user-key"}, + } + + // Connect to Weaviate as root user + client, err := weaviate.NewClient(cfg) + // END AdminClient + require.NoError(t, err) + require.NotNil(t, client) + }) + + t.Run("CreateRole", func(t *testing.T) { + defer cleanup("testRole") + + // START CreateRole + role := rbac.NewRole("testRole", rbac.DataPermission{ + Actions: []string{models.PermissionActionReadData}, + Collection: "*", + }) + err = client.Roles().Creator().WithRole(role).Do(ctx) + require.NoError(t, err) + // END CreateRole + + // START CheckRoleExists + exists, err := client.Roles().Exists().WithName("testRole").Do(ctx) + fmt.Println(exists) // Returns true or false + // END CheckRoleExists + + require.NoError(t, err) + assert.True(t, exists) + }) + + t.Run("AddManageRolesPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddManageRolesPermission + permissions := []rbac.Permission{ + rbac.RolesPermission{ + Role: "testRole*", // Applies to all roles starting with "testRole" + Scope: "match", // Only allow role management with the current user's permission level, can also be "all" + // Scope: rbac.RoleScopeAll, // Allow role management with all permissions + Actions: []string{ + models.PermissionActionCreateRoles, // Allow creating roles + models.PermissionActionReadRoles, // Allow reading roles + models.PermissionActionUpdateRoles, // Allow updating roles + models.PermissionActionDeleteRoles, // Allow deleting roles + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddManageRolesPermission + require.NoError(t, err) + + exists, err := client.Roles().Exists().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.True(t, exists) + }) + + t.Run("AddManageUsersPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddManageUsersPermission + permissions := []rbac.Permission{ + rbac.UsersPermission{ + Actions: []string{ + models.PermissionActionCreateUsers, // Allow creating users + models.PermissionActionReadUsers, // Allow reading user info + models.PermissionActionUpdateUsers, // Allow rotating user API key + models.PermissionActionDeleteUsers, // Allow deleting users + models.PermissionActionAssignAndRevokeUsers, // Allow assigning and revoking roles to and from users + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddManageUsersPermission + require.NoError(t, err) + + exists, err := client.Roles().Exists().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.True(t, exists) + }) + + t.Run("AddCollectionsPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddCollectionsPermission + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + Actions: []string{ + models.PermissionActionCreateCollections, // Allow creating new collections + models.PermissionActionReadCollections, // Allow reading collection info/metadata + models.PermissionActionUpdateCollections, // Allow updating collection configuration, i.e. update schema properties, when inserting data with new properties + models.PermissionActionDeleteCollections, // Allow deleting collections + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddCollectionsPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Collections) + assert.Greater(t, len(role.Collections), 0) + found := false + for _, perm := range role.Collections { + if perm.Collection == "TargetCollection*" { + found = true + break + } + } + assert.True(t, found) + }) + + t.Run("AddTenantPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddTenantPermission + permissions := []rbac.Permission{ + rbac.TenantsPermission{ + Actions: []string{ + models.PermissionActionCreateTenants, // Allow creating new tenants + models.PermissionActionReadTenants, // Allow reading tenant info/metadata + models.PermissionActionUpdateTenants, // Allow updating tenant states + models.PermissionActionDeleteTenants, // Allow deleting tenants + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddTenantPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Tenants) + assert.Greater(t, len(role.Tenants), 0) + }) + + t.Run("AddDataObjectPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddDataObjectPermission + permissions := []rbac.Permission{ + rbac.DataPermission{ + Collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + Actions: []string{ + models.PermissionActionCreateData, // Allow data inserts + models.PermissionActionReadData, // Allow query and fetch operations + models.PermissionActionUpdateData, // Allow data updates + // models.PermissionActionDeleteData, // Allow data deletes - set to false by not including + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddDataObjectPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Data) + assert.Greater(t, len(role.Data), 0) + found := false + for _, perm := range role.Data { + if perm.Collection == "TargetCollection*" { + found = true + break + } + } + assert.True(t, found) + }) + + t.Run("AddBackupPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddBackupPermission + permissions := []rbac.Permission{ + rbac.BackupsPermission{ + Collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + Actions: []string{ + models.PermissionActionManageBackups, // Allow managing backups + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddBackupPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Backups) + assert.Greater(t, len(role.Backups), 0) + found := false + for _, perm := range role.Backups { + if perm.Collection == "TargetCollection*" { + found = true + break + } + } + assert.True(t, found) + }) + + t.Run("AddClusterPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddClusterPermission + permissions := []rbac.Permission{ + rbac.ClusterPermission{ + Actions: []string{ + models.PermissionActionReadCluster, // Allow reading cluster data + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddClusterPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Cluster) + }) + + t.Run("AddNodesPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddNodesPermission + verbosePermissions := []rbac.Permission{ + rbac.NodesPermission{ + Verbosity: "verbose", + Collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + Actions: []string{ + models.PermissionActionReadNodes, // Allow reading node metadata + }, + }, + } + + // The `minimal` verbosity level applies to all collections unlike + // the `verbose` level where you specify the collection name filter + err = client.Roles().Creator().WithRole( + rbac.NewRole("testRole", verbosePermissions...), + ).Do(ctx) + // END AddNodesPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Nodes) + assert.Greater(t, len(role.Nodes), 0) + found := false + for _, perm := range role.Nodes { + if perm.Collection == "TargetCollection*" { + found = true + break + } + } + assert.True(t, found) + }) + + t.Run("AddAliasPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddAliasPermission + permissions := []rbac.Permission{ + rbac.AliasPermission{ + Alias: "TargetAlias*", // Applies to all aliases starting with "TargetAlias" + Collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + Actions: []string{ + models.PermissionActionCreateAliases, // Allow alias creation + models.PermissionActionReadAliases, // Allow listing aliases + models.PermissionActionUpdateAliases, // Allow updating aliases + // models.PermissionActionDeleteAliases, // Allow deleting aliases - set to false by not including + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddAliasPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Alias) + assert.Greater(t, len(role.Alias), 0) + found := false + for _, perm := range role.Alias { + if perm.Alias == "TargetAlias*" { + found = true + break + } + } + assert.True(t, found) + }) + + t.Run("AddReplicationsPermission", func(t *testing.T) { + defer cleanup("testRole") + + // START AddReplicationsPermission + permissions := []rbac.Permission{ + rbac.ReplicatePermission{ + Collection: "TargetCollection*", // Applies to all collections starting with "TargetCollection" + Shard: "TargetShard*", // Applies to all shards starting with "TargetShard" + Actions: []string{ + models.PermissionActionCreateReplicate, // Allow replica movement operations + models.PermissionActionReadReplicate, // Allow retrieving replication status + models.PermissionActionUpdateReplicate, // Allow cancelling replication operations + // models.PermissionActionDeleteReplicate, // Allow deleting replication operations - set to false by not including + }, + }, + } + + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // END AddReplicationsPermission + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Replicate) + assert.Greater(t, len(role.Replicate), 0) + found := false + for _, perm := range role.Replicate { + if perm.Collection == "TargetCollection*" && perm.Shard == "TargetShard*" { + found = true + break + } + } + assert.True(t, found) + }) + + // t.Run("AddGroupsPermission", func(t *testing.T) { + // defer cleanup("testRole") + + // // START AddGroupsPermission + // Coming soon + // permissions := []rbac.Permission{ + // rbac.GroupsPermission{ + // Group: "TargetGroup*", // Applies to all groups starting with "TargetGroup" + // Actions: []string{ + // models.PermissionActionReadGroups, // Allow reading group information + // models.PermissionActionAssignAndRevokeGroups, // Allow assigning and revoking group memberships + // }, + // }, + // } + + // err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + // // END AddGroupsPermission + // require.NoError(t, err) + + // role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + // require.NoError(t, err) + // assert.NotNil(t, role.Groups) + // assert.Greater(t, len(role.Groups), 0) + // found := false + // for _, perm := range role.Groups { + // if perm.Group == "TargetGroup*" { + // found = true + // break + // } + // } + // assert.True(t, found) + // }) + + t.Run("AddRoles", func(t *testing.T) { + defer cleanup("testRole") + + // Create initial role + initialPermissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + }, + }, + } + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", initialPermissions...)).Do(ctx) + require.NoError(t, err) + + // START AddRoles + permissions := []rbac.Permission{ + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionCreateData, + }, + }, + } + + err = client.Roles().PermissionAdder(). + WithRole("testRole"). + WithPermissions(permissions...). + Do(ctx) + // END AddRoles + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + assert.NotNil(t, role.Data) + }) + + t.Run("InspectRole", func(t *testing.T) { + defer cleanup("testRole") + + // Create test role with permissions + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + }, + }, + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionCreateData, + }, + }, + } + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + require.NoError(t, err) + + // START InspectRole + testRole, err := client.Roles().Getter().WithName("testRole").Do(ctx) + + fmt.Println(testRole) + fmt.Println(testRole.Collections) + fmt.Println(testRole.Data) + // END InspectRole + + require.NoError(t, err) + assert.NotNil(t, testRole) + assert.NotNil(t, testRole.Collections) + assert.NotNil(t, testRole.Data) + }) + + t.Run("AssignedUsers", func(t *testing.T) { + defer cleanup("testRole") + + // Create test role + role := rbac.NewRole("testRole", rbac.DataPermission{ + Actions: []string{models.PermissionActionReadData}, + Collection: "*", + }) + err = client.Roles().Creator().WithRole(role).Do(ctx) + require.NoError(t, err) + + // Create and assign user + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + _, err = client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + err = client.Users().DB().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole"). + Do(ctx) + require.NoError(t, err) + + // START AssignedUsers + assignedUsers, err := client.Roles().UserAssignmentGetter(). + WithRole("testRole"). + Do(ctx) + + for _, user := range assignedUsers { + fmt.Println(user) + } + // END AssignedUsers + + require.NoError(t, err) + found := false + for _, u := range assignedUsers { + if u.UserID == "custom-user" { + found = true + break + } + } + assert.True(t, found) + + // Clean up + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + }) + + t.Run("ListAllRoles", func(t *testing.T) { + defer cleanup("testRole") + + // Create test role + role := rbac.NewRole("testRole", rbac.DataPermission{ + Actions: []string{models.PermissionActionReadData}, + Collection: "*", + }) + err = client.Roles().Creator().WithRole(role).Do(ctx) + require.NoError(t, err) + + // START ListAllRoles + allRoles, err := client.Roles().AllGetter().Do(ctx) + + for _, role := range allRoles { + fmt.Println(role.Name, role) + } + // END ListAllRoles + + require.NoError(t, err) + found := false + for _, role := range allRoles { + if role.Name == "testRole" { + found = true + break + } + } + assert.True(t, found) + }) + + t.Run("RemovePermissions", func(t *testing.T) { + defer cleanup("testRole") + + // Create role with permissions + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + models.PermissionActionCreateCollections, + models.PermissionActionDeleteCollections, + }, + }, + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadData, + models.PermissionActionCreateData, + }, + }, + } + err = client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + require.NoError(t, err) + + // START RemovePermissions + permissions = []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + models.PermissionActionCreateCollections, + models.PermissionActionDeleteCollections, + }, + }, + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadData, + // models.PermissionActionCreateData, // create=False + }, + }, + } + + err = client.Roles().PermissionRemover(). + WithRole("testRole"). + WithPermissions(permissions...). + Do(ctx) + // END RemovePermissions + + require.NoError(t, err) + + role, err := client.Roles().Getter().WithName("testRole").Do(ctx) + require.NoError(t, err) + // Verify permissions were removed + assert.NotNil(t, role) + }) + + t.Run("DeleteRole", func(t *testing.T) { + // Create test role + role := rbac.NewRole("testRole", rbac.DataPermission{ + Actions: []string{models.PermissionActionReadData}, + Collection: "*", + }) + err = client.Roles().Creator().WithRole(role).Do(ctx) + require.NoError(t, err) + + // START DeleteRole + err = client.Roles().Deleter().WithName("testRole").Do(ctx) + // END DeleteRole + + require.NoError(t, err) + + exists, err := client.Roles().Exists().WithName("testRole").Do(ctx) + require.Error(t, err) + assert.False(t, exists) + }) +} diff --git a/_includes/code/howto/go/docs/configure/rbac.users_test.go b/_includes/code/howto/go/docs/configure/rbac.users_test.go new file mode 100644 index 00000000..2530e619 --- /dev/null +++ b/_includes/code/howto/go/docs/configure/rbac.users_test.go @@ -0,0 +1,299 @@ +package docs + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/weaviate/weaviate-go-client/v5/weaviate" + "github.com/weaviate/weaviate-go-client/v5/weaviate/auth" + "github.com/weaviate/weaviate-go-client/v5/weaviate/rbac" + "github.com/weaviate/weaviate/entities/models" +) + +func TestUserManagement(t *testing.T) { + ctx := context.Background() + + // ============================== + // ===== CONNECT ===== + // ============================== + + // START AdminClient + cfg := weaviate.Config{ + Host: "localhost:8580", + Scheme: "http", + AuthConfig: auth.ApiKey{Value: "root-user-key"}, + } + + // Connect to Weaviate as root user + client, err := weaviate.NewClient(cfg) + // END AdminClient + require.NoError(t, err) + + // Verify connection + ready, err := client.Misc().ReadyChecker().Do(ctx) + require.NoError(t, err) + require.True(t, ready) + + // Clean up any existing test user + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + + // Clean up any existing test role + client.Roles().Deleter().WithName("testRole").Do(ctx) + + t.Run("CreateUser", func(t *testing.T) { + // START CreateUser + userApiKey, err := client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + fmt.Println(userApiKey) + // END CreateUser + + require.NoError(t, err) + assert.Greater(t, len(userApiKey), 0) + + // Store for later tests + t.Cleanup(func() { + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + }) + }) + + t.Run("RotateApiKey", func(t *testing.T) { + // Ensure user exists + userApiKey, err := client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + + // START RotateApiKey + newApiKey, err := client.Users().DB().KeyRotator().WithUserID("custom-user").Do(ctx) + fmt.Println(newApiKey) + // END RotateApiKey + + require.NoError(t, err) + assert.Greater(t, len(newApiKey), 0) + assert.NotEqual(t, newApiKey, userApiKey) + + t.Cleanup(func() { + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + }) + }) + + t.Run("AssignRole", func(t *testing.T) { + // Create test role first + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + models.PermissionActionCreateCollections, + }, + }, + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadData, + models.PermissionActionCreateData, + }, + }, + } + + err := client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + require.NoError(t, err) + + // Ensure user exists + _, err = client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + + // START AssignRole + err = client.Users().DB().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole", "viewer"). + Do(ctx) + // END AssignRole + + require.NoError(t, err) + + // Verify roles were assigned + userRoles, err := client.Users().DB().RolesGetter(). + WithUserID("custom-user"). + WithIncludeFullRoles(true). + Do(ctx) + require.NoError(t, err) + + roleNames := make([]string, len(userRoles)) + for i, role := range userRoles { + roleNames[i] = role.Name + } + assert.Contains(t, roleNames, "testRole") + assert.Contains(t, roleNames, "viewer") + + t.Cleanup(func() { + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + client.Roles().Deleter().WithName("testRole").Do(ctx) + }) + }) + + t.Run("ListAllUsers", func(t *testing.T) { + // Ensure user exists + _, err := client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + + // START ListAllUsers + users, err := client.Users().DB().Lister().Do(ctx) + fmt.Println(users) + // END ListAllUsers + + require.NoError(t, err) + + // Verify custom-user is in the list + userFound := false + for _, user := range users { + if user.UserID == "custom-user" { + userFound = true + break + } + } + assert.True(t, userFound, "custom-user should be in the list") + + t.Cleanup(func() { + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + }) + }) + + t.Run("ListUserRoles", func(t *testing.T) { + // Create test role + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + models.PermissionActionCreateCollections, + }, + }, + rbac.DataPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadData, + models.PermissionActionCreateData, + }, + }, + } + + err := client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + require.NoError(t, err) + + // Create user and assign roles + _, err = client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + + err = client.Users().DB().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole", "viewer"). + Do(ctx) + require.NoError(t, err) + + // START ListUserRoles + userRoles, err := client.Users().DB().RolesGetter(). + WithUserID("custom-user"). + WithIncludeFullRoles(true). + Do(ctx) + + for _, role := range userRoles { + fmt.Println(role) + } + // END ListUserRoles + + require.NoError(t, err) + + roleNames := make([]string, len(userRoles)) + for i, role := range userRoles { + roleNames[i] = role.Name + } + assert.Contains(t, roleNames, "testRole") + assert.Contains(t, roleNames, "viewer") + + t.Cleanup(func() { + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + client.Roles().Deleter().WithName("testRole").Do(ctx) + }) + }) + + t.Run("RevokeRoles", func(t *testing.T) { + // Create test role + permissions := []rbac.Permission{ + rbac.CollectionsPermission{ + Collection: "TargetCollection*", + Actions: []string{ + models.PermissionActionReadCollections, + models.PermissionActionCreateCollections, + }, + }, + } + + err := client.Roles().Creator().WithRole(rbac.NewRole("testRole", permissions...)).Do(ctx) + require.NoError(t, err) + + // Create user and assign roles + _, err = client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + + err = client.Users().DB().RolesAssigner(). + WithUserID("custom-user"). + WithRoles("testRole", "viewer"). + Do(ctx) + require.NoError(t, err) + + // START RevokeRoles + err = client.Users().DB().RolesRevoker(). + WithUserID("custom-user"). + WithRoles("testRole"). + Do(ctx) + // END RevokeRoles + + require.NoError(t, err) + + // Verify role was revoked + userRoles, err := client.Users().DB().RolesGetter(). + WithUserID("custom-user"). + WithIncludeFullRoles(true). + Do(ctx) + require.NoError(t, err) + + roleNames := make([]string, len(userRoles)) + for i, role := range userRoles { + roleNames[i] = role.Name + } + assert.NotContains(t, roleNames, "testRole", "testRole should be revoked") + assert.Contains(t, roleNames, "viewer", "viewer role should still be assigned") + + t.Cleanup(func() { + client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + client.Roles().Deleter().WithName("testRole").Do(ctx) + }) + }) + + t.Run("DeleteUser", func(t *testing.T) { + // Create user + _, err := client.Users().DB().Creator().WithUserID("custom-user").Do(ctx) + require.NoError(t, err) + + // START DeleteUser + deleted, err := client.Users().DB().Deleter().WithUserID("custom-user").Do(ctx) + // END DeleteUser + + require.NoError(t, err) + require.True(t, deleted) + + // Verify user was deleted + users, err := client.Users().DB().Lister().Do(ctx) + require.NoError(t, err) + + userFound := false + for _, user := range users { + if user.UserID == "custom-user" { + userFound = true + break + } + } + assert.False(t, userFound, "custom-user should not be in the list after deletion") + }) +} diff --git a/_includes/code/howto/go/docs/mainpkg/search-filters_test.go b/_includes/code/howto/go/docs/mainpkg/search-filters_test.go index d9d751e5..a6e0ded6 100644 --- a/_includes/code/howto/go/docs/mainpkg/search-filters_test.go +++ b/_includes/code/howto/go/docs/mainpkg/search-filters_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "log" + "strconv" "strings" "testing" "time" @@ -12,16 +13,10 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - //"github.com/weaviate/weaviate-go-client/v5/weaviate" - //"github.com/weaviate/weaviate-go-client/v5/weaviate/auth" "github.com/weaviate/weaviate-go-client/v5/weaviate/filters" "github.com/weaviate/weaviate-go-client/v5/weaviate/graphql" ) -// ================================ -// ===== INSTANTIATION-COMMON ===== -// ================================ - // ========================================== // ===== Single Filter ===== // ========================================== @@ -171,6 +166,59 @@ func TestContainsAllFilter(t *testing.T) { } } +// ========================================== +// ===== ContainsNone Filter ===== +// ========================================== + +func TestContainsNoneFilter(t *testing.T) { + client := setupClient() + ctx := context.Background() + + // START ContainsNoneFilter Go + // highlight-start + tokenList := []string{"bird", "animal"} + // highlight-end + + response, err := client.GraphQL().Get(). + WithClassName("JeopardyQuestion"). + WithFields( + graphql.Field{Name: "question"}, + graphql.Field{Name: "answer"}, + graphql.Field{ + Name: "hasCategory", + Fields: []graphql.Field{ + {Name: "... on JeopardyCategory", Fields: []graphql.Field{{Name: "title"}}}, + }, + }, + ). + // highlight-start + WithWhere(filters.Where(). + WithPath([]string{"answer"}). + WithOperator(filters.ContainsNone). + WithValueText(tokenList...)). + // highlight-end + WithLimit(3). + Do(ctx) + // END ContainsNoneFilter Go + + require.NoError(t, err) + outBytes, err := json.Marshal(response) + require.NoError(t, err) + fmt.Printf("%s\n", string(outBytes)) + + objects := response.Data["Get"].(map[string]interface{})["JeopardyQuestion"].([]interface{}) + require.NotEmpty(t, objects) + + for _, obj := range objects { + question := obj.(map[string]interface{}) + answer := strings.ToLower(question["answer"].(string)) + // Assert that the answer does not contain any of the excluded tokens + for _, token := range tokenList { + assert.NotContains(t, answer, token) + } + } +} + // ========================================== // ===== Partial Match Filter ===== // ========================================== @@ -206,7 +254,7 @@ func TestLikeFilter(t *testing.T) { } // ========================================== -// ===== Multiple Filters with And ===== +// ===== Multiple Filters with And & Not ===== // ========================================== func TestMultipleFiltersAnd(t *testing.T) { @@ -216,15 +264,26 @@ func TestMultipleFiltersAnd(t *testing.T) { // START MultipleFiltersAnd Go response, err := client.GraphQL().Get(). WithClassName("JeopardyQuestion"). - WithFields(graphql.Field{Name: "question"}, graphql.Field{Name: "answer"}, graphql.Field{Name: "round"}, graphql.Field{Name: "points"}). + WithFields( + graphql.Field{Name: "question"}, + graphql.Field{Name: "answer"}, + graphql.Field{Name: "round"}, + graphql.Field{Name: "points"}, + ). // highlight-start WithWhere(filters.Where(). WithOperator(filters.And). WithOperands([]*filters.WhereBuilder{ filters.Where().WithPath([]string{"round"}).WithOperator(filters.Equal).WithValueString("Double Jeopardy!"), filters.Where().WithPath([]string{"points"}).WithOperator(filters.LessThan).WithValueInt(600), - }, - )). + // Add a NOT operator to exclude a specific answer + filters.Where(). + WithOperator(filters.Not). + WithOperands([]*filters.WhereBuilder{ + filters.Where().WithPath([]string{"answer"}).WithOperator(filters.Equal).WithValueString("Yucatan"), + }), + }), + ). // highlight-end WithLimit(3). Do(ctx) @@ -240,6 +299,8 @@ func TestMultipleFiltersAnd(t *testing.T) { question := obj.(map[string]interface{}) assert.Equal(t, "Double Jeopardy!", question["round"]) assert.Less(t, question["points"].(float64), float64(600)) + // Add assertion to verify the NOT operator worked + assert.NotEqual(t, "Mongoose", question["answer"]) } } @@ -275,19 +336,7 @@ func TestMultipleFiltersNested(t *testing.T) { fmt.Printf("%s\n", string(outBytes)) objects := response.Data["Get"].(map[string]interface{})["JeopardyQuestion"].([]interface{}) - birdCount := 0 - for _, obj := range objects { - question := obj.(map[string]interface{}) - fmt.Printf("%s, %v\n", question["answer"], question["points"]) - - if strings.Contains(strings.ToLower(question["answer"].(string)), "the appian way") { - birdCount++ - } - points := question["points"].(float64) - assert.False(t, points < 300 || points > 700) - } - assert.Greater(t, birdCount, 0) - + assert.NotEmpty(t, objects, "Query should return objects with points between 300 and 700") } // =================================================== @@ -363,7 +412,7 @@ func TestFilterByDate(t *testing.T) { require.NoError(t, err, "Error executing query") // Assert that we got results - objects, ok := response.Data["Get"].(map[string]interface{})["CollectionWithDate"].([]interface{}) + objects, ok := response.Data["Get"].(map[string]interface{})["Article"].([]interface{}) require.True(t, ok, "Failed to get objects from result") require.NotEmpty(t, objects, "No objects returned from query") @@ -377,12 +426,12 @@ func TestFilterByDate(t *testing.T) { require.NoError(t, err, "Error marshaling properties to JSON") t.Logf("Object properties: %s", jsonProperties) - // Assert that 'some_date' exists and is after filterTime - someDate, ok := properties["some_date"].(string) - require.True(t, ok, "'some_date' is not a string") + // Assert that 'publicationDate' exists and is after filterTime + publicationDate, ok := properties["publicationDate"].(string) + require.True(t, ok, "'publicationDate' is not a string") - objectTime, err := time.Parse(time.RFC3339, someDate) - require.NoError(t, err, "Error parsing 'some_date'") + objectTime, err := time.Parse(time.RFC3339, publicationDate) + require.NoError(t, err, "Error parsing 'publicationDate'") assert.True(t, objectTime.After(filterTime), "Object date is not after filter date") } @@ -477,7 +526,10 @@ func TestFilterByTimestamp(t *testing.T) { require.NoError(t, err) object := response.Data["Get"].(map[string]interface{})["Article"].([]interface{})[0].(map[string]interface{}) - respEpoch := int64(object["_additional"].(map[string]interface{})["creationTimeUnix"].(float64)) + creationTimeStr := object["_additional"].(map[string]interface{})["creationTimeUnix"].(string) + // 2. Parse the string into an int64. + respEpoch, err := strconv.ParseInt(creationTimeStr, 10, 64) + require.NoError(t, err) // Ensure parsing was successful assert.Greater(t, respEpoch, queryTime.Unix()) } diff --git a/_includes/code/howto/go/docs/manage-data.aliases_test.go b/_includes/code/howto/go/docs/manage-data.aliases_test.go index 0d7a9b2c..3ff4ef9b 100644 --- a/_includes/code/howto/go/docs/manage-data.aliases_test.go +++ b/_includes/code/howto/go/docs/manage-data.aliases_test.go @@ -19,12 +19,15 @@ func Test_ManageAliases(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() + // START ConnectToWeaviate + // Connect to local Weaviate instance config := weaviate.Config{ Scheme: "http", Host: "localhost:8080", } client, err := weaviate.NewClient(config) require.NoError(t, err) + // END ConnectToWeaviate // Check if Weaviate is ready ready, err := client.Misc().ReadyChecker().Do(ctx) @@ -40,7 +43,7 @@ func Test_ManageAliases(t *testing.T) { } // Clean up collections - collectionsToDelete := []string{"Articles", "ArticlesV2", "Products_v1", "Products_v2"} + collectionsToDelete := []string{"Articles", "ArticlesV2", "Products_v1", "Products_v2", "MyArticles"} for _, className := range collectionsToDelete { _ = client.Schema().ClassDeleter().WithClassName(className).Do(ctx) } @@ -215,16 +218,6 @@ func Test_ManageAliases(t *testing.T) { fmt.Printf("Found: %v\n", title) } } - - // Add a new property using the alias - property := &models.Property{ - Name: "author", - DataType: schema.DataTypeText.PropString(), - } - err = client.Schema().PropertyCreator(). - WithClassName("MyArticles"). - WithProperty(property). - Do(ctx) // UseAlias END require.NoError(t, err) @@ -233,8 +226,8 @@ func Test_ManageAliases(t *testing.T) { }) t.Run("migration example", func(t *testing.T) { - // MigrationExample START - // Step 1: Create original collection with data + // START Step1CreateOriginal + // Create original collection with data err := client.Schema().ClassCreator().WithClass(&models.Class{ Class: "Products_v1", Vectorizer: "none", @@ -265,16 +258,38 @@ func Test_ManageAliases(t *testing.T) { Do(ctx) require.NoError(t, err) + // END Step1CreateOriginal - // Step 2: Create alias pointing to current collection + // START Step2CreateAlias + // Create alias pointing to current collection err = client.Alias().AliasCreator().WithAlias(&alias.Alias{ Alias: "Products", Class: "Products_v1", }).Do(ctx) require.NoError(t, err) + // END Step2CreateAlias + + // START MigrationUseAlias + // Your application always uses the alias name "Products" + // Insert data through the alias + _, err = client.Data().Creator().WithClassName("Products").WithProperties(map[string]interface{}{ + "name": "Product C", + "price": 300, + }).Do(ctx) + require.NoError(t, err) + + // Query through the alias + resp, err := client.Data().ObjectsGetter().WithClassName("Products").WithLimit(5).Do(ctx) + require.NoError(t, err) + for _, obj := range resp { + props := obj.Properties.(map[string]interface{}) + t.Logf("Product: %v, Price: $%v", props["name"], props["price"]) + } + // END MigrationUseAlias - // Step 3: Create new collection with updated schema + // START Step3NewCollection + // Create new collection with updated schema err = client.Schema().ClassCreator().WithClass(&models.Class{ Class: "Products_v2", Vectorizer: "none", @@ -286,8 +301,10 @@ func Test_ManageAliases(t *testing.T) { }).Do(ctx) require.NoError(t, err) + // END Step3NewCollection - // Step 4: Migrate data to new collection + // START Step4MigrateData + // Migrate data to new collection oldData, err := client.Data().ObjectsGetter(). WithClassName("Products_v1"). Do(ctx) @@ -306,8 +323,10 @@ func Test_ManageAliases(t *testing.T) { require.NoError(t, err) } + // END Step4MigrateData - // Step 5: Switch alias to new collection (instant switch!) + // START Step5UpdateAlias + // Switch alias to new collection (instant switch!) err = client.Alias().AliasUpdater().WithAlias(&alias.Alias{ Alias: "Products", Class: "Products_v2", @@ -326,10 +345,12 @@ func Test_ManageAliases(t *testing.T) { if len(result) > 0 { fmt.Printf("%v\n", result[0].Properties) // Will include the new "category" field } + // END Step5UpdateAlias - // Step 6: Clean up old collection after verification + // START Step6Cleanup + // Clean up old collection after verification err = client.Schema().ClassDeleter().WithClassName("Products_v1").Do(ctx) - // MigrationExample END + // END Step6Cleanup // Error is expected if collection has data or other dependencies // In production, you'd want to ensure the collection is empty first diff --git a/_includes/code/howto/go/go.mod b/_includes/code/howto/go/go.mod index 53fd4c5e..a88236c7 100644 --- a/_includes/code/howto/go/go.mod +++ b/_includes/code/howto/go/go.mod @@ -7,8 +7,8 @@ toolchain go1.24.3 require ( github.com/go-openapi/strfmt v0.23.0 github.com/stretchr/testify v1.10.0 - github.com/weaviate/weaviate v1.32.0 - github.com/weaviate/weaviate-go-client/v5 v5.3.0 + github.com/weaviate/weaviate v1.32.5-0.20250822170836-4c48597f18c9 + github.com/weaviate/weaviate-go-client/v5 v5.4.2-0.20250827170908-f2205767afe8 ) require ( @@ -32,12 +32,12 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect - golang.org/x/net v0.41.0 // indirect + golang.org/x/net v0.42.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/text v0.26.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect - google.golang.org/grpc v1.73.0 // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/text v0.27.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect + google.golang.org/grpc v1.74.2 // indirect google.golang.org/protobuf v1.36.6 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/_includes/code/howto/go/go.sum b/_includes/code/howto/go/go.sum index cfc27e7a..70d9fc08 100644 --- a/_includes/code/howto/go/go.sum +++ b/_includes/code/howto/go/go.sum @@ -11,8 +11,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/analysis v0.21.2/go.mod h1:HZwRk4RRisyG8vx2Oe6aqeSQcoxRp47Xkp3+K6q+LdY= @@ -149,10 +149,10 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= -github.com/weaviate/weaviate v1.32.0 h1:TCvYXgltujA7jHkAw4sftPHR66Y5mQxWJ/vUxyvMGho= -github.com/weaviate/weaviate v1.32.0/go.mod h1:v9QecB2AzvebO4QMmbhGllMQtTwHqp+4pcAwdbAGPbE= -github.com/weaviate/weaviate-go-client/v5 v5.3.0 h1:LzTs0Yity2gtJbdeQlfY4FqTcCWALiR98DCVU2CS9Rs= -github.com/weaviate/weaviate-go-client/v5 v5.3.0/go.mod h1:yuycRA/AeZKc/AI1pmIMi/Lq/tnOpA93OVAAdsYSwp4= +github.com/weaviate/weaviate v1.32.5-0.20250822170836-4c48597f18c9 h1:oB1Jtq87mD5HP99oCSzE3wVzzHwVKtdfYa3VfcorN+o= +github.com/weaviate/weaviate v1.32.5-0.20250822170836-4c48597f18c9/go.mod h1:MmHF/hZDL0I8j0qAMEa9/TS4ISLaYlIp1Bc3e/n3eUU= +github.com/weaviate/weaviate-go-client/v5 v5.4.2-0.20250827170908-f2205767afe8 h1:i8jZqxDC6kfrtzqpVVMD1WSQInRoV8Kx8rDXHez7i6A= +github.com/weaviate/weaviate-go-client/v5 v5.4.2-0.20250827170908-f2205767afe8/go.mod h1:0ZcSI9xF14Rw7WLJ0PuMlOuGbXgGZPOjiNQBlbdAqVY= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs= github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM= @@ -183,8 +183,8 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= +golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= +golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -203,8 +203,8 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -212,18 +212,18 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190329151228-23e29df326fe/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190416151739-9c9e1878f421/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190420181800-aa740d480789/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190531172133-b3315ee88b7d/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= -google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= -google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 h1:MAKi5q709QWfnkkpNQ0M12hYJ1+e8qYVDyowc4U1XZM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4= +google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java index 20ebb606..33449886 100644 --- a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java @@ -30,12 +30,15 @@ class AliasesTest { @BeforeAll public static void beforeAll() { + // START ConnectToWeaviate + // Connect to local Weaviate instance String scheme = "http"; String host = "localhost"; String port = "8080"; Config config = new Config(scheme, host + ":" + port); client = new WeaviateClient(config); + // END ConnectToWeaviate } @BeforeEach @@ -95,14 +98,10 @@ public void shouldListAllAliases() { Result> allAliasesResult = client.alias().allGetter().run(); Map allAliases = allAliasesResult.getResult(); - // Filter to show only aliases from this example for (Map.Entry entry : allAliases.entrySet()) { Alias aliasInfo = entry.getValue(); - if (aliasInfo.getClassName().equals("Articles") || - aliasInfo.getClassName().equals("ArticlesV2")) { - System.out.println("Alias: " + aliasInfo.getAlias() + - " -> Collection: " + aliasInfo.getClassName()); - } + System.out.println("Alias: " + aliasInfo.getAlias() + + " -> Collection: " + aliasInfo.getClassName()); } // END ListAllAliases @@ -271,24 +270,12 @@ public void shouldUseAlias() { System.out.println("Found: " + obj.get("title")); } } - - // Add a new property using the alias - Result addPropertyResult = client.schema() - .propertyCreator() - .withClassName("MyArticles") // Using alias - .withProperty(Property.builder() - .name("author") - .dataType(Arrays.asList(DataType.TEXT)) - .build()) - .run(); // END UseAlias assertThat(insertResult).isNotNull() .returns(false, Result::hasErrors); assertThat(queryResult).isNotNull() .returns(false, Result::hasErrors); - assertThat(addPropertyResult).isNotNull() - .returns(false, Result::hasErrors); } @Test @@ -312,8 +299,8 @@ public void shouldDeleteAlias() { @Test public void shouldPerformMigration() { - // START MigrationExample - // Step 1: Create original collection with data + // START Step1CreateOriginal + // Create original collection with data WeaviateClass productsV1 = WeaviateClass.builder() .className("Products_v1") .build(); @@ -346,14 +333,41 @@ public void shouldPerformMigration() { client.batch().objectsBatcher() .withObjects(products.toArray(new WeaviateObject[0])) .run(); + // END Step1CreateOriginal - // Step 2: Create alias pointing to current collection + // START Step2CreateAlias + // Create alias pointing to current collection client.alias().creator() .withClassName("Products_v1") .withAlias("Products") .run(); + // END Step2CreateAlias + + // START MigrationUseAlias + // Your application always uses the alias name "Products" + // Insert data through the alias + Result insertResult = client.data().creator() + .withClassName("Products") + .withProperties(new HashMap() {{ + put("name", "Product C"); + put("price", 300); + }}) + .run(); + + // Query through the alias + Result> queryResult = client.data().objectsGetter() + .withClassName("Products") + .withLimit(5) + .run(); + List results = queryResult.getResult(); + for (WeaviateObject obj : results) { + Map props = obj.getProperties(); + System.out.println("Product: " + props.get("name") + ", Price: $" + props.get("price")); + } + // END MigrationUseAlias - // Step 3: Create new collection with updated schema + // START Step3NewCollection + // Create new collection with updated schema WeaviateClass productsV2 = WeaviateClass.builder() .className("Products_v2") .properties(Arrays.asList( @@ -375,8 +389,10 @@ public void shouldPerformMigration() { client.schema().classCreator() .withClass(productsV2) .run(); + // END Step3NewCollection - // Step 4: Migrate data to new collection + // START Step4MigrateData + // Migrate data to new collection Result oldDataResult = client.graphQL() .get() .withClassName("Products_v1") @@ -411,8 +427,10 @@ public void shouldPerformMigration() { .withObjects(newProducts.toArray(new WeaviateObject[0])) .run(); } + // END Step4MigrateData - // Step 5: Switch alias to new collection (instant switch!) + // START Step5UpdateAlias + // Switch alias to new collection (instant switch!) client.alias().updater() .withAlias("Products") .withNewClassName("Products_v2") @@ -435,12 +453,14 @@ public void shouldPerformMigration() { List> products_data = (List>) get.get("Products"); System.out.println(products_data.get(0)); // Will include the new "category" field } + // END Step5UpdateAlias - // Step 6: Clean up old collection after verification + // START Step6Cleanup + // Clean up old collection after verification client.schema().classDeleter() .withClassName("Products_v1") .run(); - // END MigrationExample + // END Step6Cleanup assertThat(result).isNotNull() .returns(false, Result::hasErrors); @@ -464,4 +484,4 @@ private void createArticlesCollection() { .withClass(articlesClass) .run(); } -} \ No newline at end of file +} diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-oidc-users.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-oidc-users.java new file mode 100644 index 00000000..322cbdb8 --- /dev/null +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-oidc-users.java @@ -0,0 +1,257 @@ +package io.weaviate.docs; + +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateAuthClient; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.auth.exception.AuthException; +import io.weaviate.client.v1.rbac.model.*; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +class OidcUserManagementTest { + + private static WeaviateClient client; + + @BeforeAll + public static void beforeAll() throws AuthException { + // START AdminClient + String scheme = "http"; + String host = "localhost"; + String port = "8580"; // Custom port for RBAC testing + + Config config = new Config(scheme, host + ":" + port); + + client = WeaviateAuthClient.apiKey(config, "root-user-key"); + // END AdminClient + } + + @BeforeEach + public void setup() { + // Clean up and create test role + client.roles().deleter().withName("testRole").run(); + + Permission[] permissions = new Permission[] { + Permission.collections( + "TargetCollection*", + CollectionsPermission.Action.READ, + CollectionsPermission.Action.CREATE), + Permission.data( + "TargetCollection*", + DataPermission.Action.READ, + DataPermission.Action.CREATE) + }; + + client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + + // Clean up any existing role assignments for the OIDC user + try { + client.users().oidc().revoker() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + } catch (Exception e) { + // Ignore if roles were not assigned + } + } + + @Test + public void shouldAssignOidcUserRole() { + // START AssignOidcUserRole + Result assignResult = client.users().oidc().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + // END AssignOidcUserRole + + assertThat(assignResult.getError()).isNull(); + + // Verify roles were assigned + Result> rolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + List assignedRoles = rolesResult.getResult(); + + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + } + + @Test + public void shouldListOidcUserRoles() { + // Setup: Assign roles first + client.users().oidc().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + + // START ListOidcUserRoles + Result> userRolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + List userRoles = userRolesResult.getResult(); + + for (Role role : userRoles) { + System.out.println(role.getName()); + } + // END ListOidcUserRoles + + assertThat(userRolesResult.getError()).isNull(); + assertTrue(userRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(userRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + } + + @Test + public void shouldRevokeOidcUserRoles() { + // Setup: Assign roles first + client.users().oidc().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + + // Verify roles were assigned + Result> initialRolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + assertTrue(initialRolesResult.getResult().stream() + .anyMatch(r -> r.getName().equals("testRole"))); + + // START RevokeOidcUserRoles + Result revokeResult = client.users().oidc().revoker() + .withUserId("custom-user") + .witRoles("testRole") + .run(); + // END RevokeOidcUserRoles + + assertThat(revokeResult.getError()).isNull(); + + // Verify role was revoked + Result> rolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + List remainingRoles = rolesResult.getResult(); + + assertFalse(remainingRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + } + + @Test + public void shouldCompleteOidcUserRoleLifecycle() { + // This test combines all OIDC operations in sequence similar to the Python + // script + + // Assign roles to OIDC user + Result assignResult = client.users().oidc().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + assertThat(assignResult.getError()).isNull(); + + // Verify roles assigned + Result> rolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + List assignedRoles = rolesResult.getResult(); + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + + // List OIDC user roles + rolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + List userRoles = rolesResult.getResult(); + + System.out.println("OIDC user roles:"); + for (Role role : userRoles) { + System.out.println(" - " + role.getName()); + } + + assertTrue(userRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(userRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + + // Revoke one role + Result revokeResult = client.users().oidc().revoker() + .withUserId("custom-user") + .witRoles("testRole") + .run(); + assertThat(revokeResult.getError()).isNull(); + + // Verify role revoked + rolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(false) + .run(); + List remainingRoles = rolesResult.getResult(); + assertFalse(remainingRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + + // Cleanup: Revoke remaining roles + client.users().oidc().revoker() + .withUserId("custom-user") + .witRoles("viewer") + .run(); + } + + @Test + public void shouldGetOidcUserRolesWithPermissions() { + // Assign role to OIDC user + client.users().oidc().assigner() + .withUserId("custom-user") + .witRoles("testRole") + .run(); + + // Get roles with permissions included + Result> rolesResult = client.users().oidc().userRolesGetter() + .withUserId("custom-user") + .includePermissions(true) // Include permission details + .run(); + + assertThat(rolesResult.getError()).isNull(); + List roles = rolesResult.getResult(); + + // Find the testRole + Role testRole = roles.stream() + .filter(r -> r.getName().equals("testRole")) + .findFirst() + .orElse(null); + + assertNotNull(testRole); + assertNotNull(testRole.getPermissions()); + assertEquals(2, testRole.getPermissions().size(), "Should have 2 permissions"); + + // Cleanup + client.users().oidc().revoker() + .withUserId("custom-user") + .witRoles("testRole") + .run(); + } + + @AfterEach + public void cleanup() { + // Clean up test role + client.roles().deleter().withName("testRole").run(); + + // Clean up any remaining role assignments + try { + client.users().oidc().revoker() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + } catch (Exception e) { + // Ignore cleanup errors + } + } +} \ No newline at end of file diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-roles.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-roles.java new file mode 100644 index 00000000..ca7e2c35 --- /dev/null +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-roles.java @@ -0,0 +1,511 @@ +package io.weaviate.docs; + +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateAuthClient; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.auth.exception.AuthException; +import io.weaviate.client.v1.rbac.model.*; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +class RbacManagementTest { + + private static WeaviateClient client; + + @BeforeAll + public static void beforeAll() throws AuthException { + // START AdminClient + String scheme = "http"; + String host = "localhost"; + String port = "8580"; // Custom port for RBAC testing + + Config config = new Config(scheme, host + ":" + port); + + client = WeaviateAuthClient.apiKey(config, "root-user-key"); + // END AdminClient + } + + @BeforeEach + public void cleanup() { + // Clean up test roles before each test + Result> roles = client.roles().allGetter().run(); + if (roles.getResult() != null) { + for (Role role : roles.getResult()) { + if (!Arrays.asList("viewer", "root", "admin", "read-only").contains(role.getName())) { + client.roles().deleter().withName(role.getName()).run(); + } + } + } + } + + @Test + public void shouldAddManageRolesPermission() { + // START AddManageRolesPermission + Permission[] permissions = new Permission[] { + Permission.roles( + "testRole*", // Applies to all roles starting with "testRole" + RolesPermission.Action.CREATE, // Allow creating roles + RolesPermission.Action.READ, // Allow reading roles + RolesPermission.Action.UPDATE, // Allow updating roles + RolesPermission.Action.DELETE // Allow deleting roles + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddManageRolesPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + Result exists = client.roles().exists() + .withName("testRole") + .run(); + assertTrue(exists.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddManageUsersPermission() { + // START AddManageUsersPermission + Permission[] permissions = new Permission[] { + Permission.users( + UsersPermission.Action.CREATE, // Allow creating users + UsersPermission.Action.READ, // Allow reading user info + UsersPermission.Action.UPDATE, // Allow rotating user API key + UsersPermission.Action.DELETE, // Allow deleting users + UsersPermission.Action.ASSIGN_AND_REVOKE // Allow assigning and revoking roles + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddManageUsersPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + Result exists = client.roles().exists() + .withName("testRole") + .run(); + assertTrue(exists.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddCollectionsPermission() { + // START AddCollectionsPermission + Permission[] permissions = new Permission[] { + Permission.collections( + "TargetCollection*", // Applies to all collections starting with "TargetCollection" + CollectionsPermission.Action.CREATE, // Allow creating new collections + CollectionsPermission.Action.READ, // Allow reading collection info/metadata + CollectionsPermission.Action.UPDATE, // Allow updating collection configuration + CollectionsPermission.Action.DELETE // Allow deleting collections + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddCollectionsPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + Result role = client.roles().getter() + .withName("testRole") + .run(); + assertNotNull(role.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddTenantPermission() { + // START AddTenantPermission + Permission[] permissions = new Permission[] { + Permission.tenants( + TenantsPermission.Action.CREATE, // Allow creating new tenants + TenantsPermission.Action.READ, // Allow reading tenant info/metadata + TenantsPermission.Action.UPDATE, // Allow updating tenant states + TenantsPermission.Action.DELETE // Allow deleting tenants + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddTenantPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddDataObjectPermission() { + // START AddDataObjectPermission + Permission[] permissions = new Permission[] { + Permission.data( + "TargetCollection*", // Applies to all collections starting with "TargetCollection" + DataPermission.Action.CREATE, // Allow data inserts + DataPermission.Action.READ, // Allow query and fetch operations + DataPermission.Action.UPDATE // Allow data updates + // Note: DELETE is not included, similar to Python example + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddDataObjectPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddBackupPermission() { + // START AddBackupPermission + Permission[] permissions = new Permission[] { + Permission.backups( + "TargetCollection*", // Applies to all collections starting with "TargetCollection" + BackupsPermission.Action.MANAGE // Allow managing backups + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddBackupPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddClusterPermission() { + // START AddClusterPermission + Permission[] permissions = new Permission[] { + Permission.cluster(ClusterPermission.Action.READ) // Allow reading cluster data + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddClusterPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddNodesPermission() { + // START AddNodesPermission + // Verbose permissions - applies to specific collections + Permission[] verbosePermissions = new Permission[] { + Permission.nodes( + "TargetCollection*", // Applies to all collections starting with "TargetCollection" + NodesPermission.Action.READ // Allow reading node metadata + ) + }; + + // Minimal permissions - applies to all collections + Permission[] minimalPermissions = new Permission[] { + Permission.nodes( + "*", // Applies to all collections + NodesPermission.Action.READ // Allow reading node metadata + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(verbosePermissions) // or minimalPermissions + .run(); + // END AddNodesPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddAliasPermission() { + // START AddAliasPermission + Permission[] permissions = new Permission[] { + Permission.alias( + "TargetAlias*", // Applies to all aliases starting with "TargetAlias" + "TargetCollection*", // Applies to all collections starting with "TargetCollection" + AliasPermission.Action.CREATE, // Allow alias creation + AliasPermission.Action.READ, // Allow listing aliases + AliasPermission.Action.UPDATE // Allow updating aliases + // Note: DELETE is not included, similar to Python example + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddAliasPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldAddReplicationsPermission() { + // START AddReplicationsPermission + Permission[] permissions = new Permission[] { + Permission.replicate( + "TargetCollection*", // Applies to all collections starting with "TargetCollection" + "TargetShard*", // Applies to all shards starting with "TargetShard" + ReplicatePermission.Action.CREATE, // Allow replica movement operations + ReplicatePermission.Action.READ, // Allow retrieving replication status + ReplicatePermission.Action.UPDATE // Allow cancelling replication operations + // Note: DELETE is not included, similar to Python example + ) + }; + + Result createResult = client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + // END AddReplicationsPermission + + assertThat(createResult.getError()).isNull(); + assertTrue(createResult.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + // @Test + // public void shouldAddGroupsPermission() { + // // START AddGroupsPermission + // Permission[] permissions = new Permission[] { + // Permission.groups( + // "TargetGroup*", // Applies to all groups starting with "TargetGroup" + // "oidc", // Group type (OIDC) + // GroupsPermission.Action.READ, // Allow reading group information + // GroupsPermission.Action.ASSIGN_AND_REVOKE // Allow assigning and revoking + // group memberships + // ) + // }; + + // Result createResult = client.roles().creator() + // .withName("testRole") + // .withPermissions(permissions) + // .run(); + // // END AddGroupsPermission + + // assertThat(createResult.getError()).isNull(); + // assertTrue(createResult.getResult()); + + // client.roles().deleter().withName("testRole").run(); + // } + + @Test + public void shouldAddPermissionsToExistingRole() { + // Create initial role + Permission[] initialPermissions = new Permission[] { + Permission.collections("TargetCollection*", CollectionsPermission.Action.READ) + }; + client.roles().creator() + .withName("testRole") + .withPermissions(initialPermissions) + .run(); + + // START AddRoles + Permission[] permissions = new Permission[] { + Permission.data("TargetCollection*", DataPermission.Action.CREATE) + }; + + Result addResult = client.roles().permissionAdder() + .withRole("testRole") + .withPermissions(permissions) + .run(); + // END AddRoles + + assertThat(addResult.getError()).isNull(); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldCheckRoleExists() { + client.roles().creator() + .withName("testRole") + .withPermissions(Permission.cluster(ClusterPermission.Action.READ)) + .run(); + + // START CheckRoleExists + Result exists = client.roles().exists() + .withName("testRole") + .run(); + System.out.println(exists.getResult()); // Returns true or false + // END CheckRoleExists + + assertTrue(exists.getResult()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldInspectRole() { + Permission[] permissions = new Permission[] { + Permission.collections("TargetCollection*", CollectionsPermission.Action.READ), + Permission.data("TargetCollection*", DataPermission.Action.CREATE) + }; + client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + + // START InspectRole + Result roleResult = client.roles().getter() + .withName("testRole") + .run(); + Role testRole = roleResult.getResult(); + + System.out.println(testRole); + System.out.println(testRole.getName()); + System.out.println(testRole.getPermissions()); + // END InspectRole + + assertNotNull(testRole); + assertEquals("testRole", testRole.getName()); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldGetAssignedUsers() { + client.roles().creator() + .withName("testRole") + .withPermissions(Permission.cluster(ClusterPermission.Action.READ)) + .run(); + + // Create and assign user (assuming user management API exists) + // client.users().db().creator().withUserId("custom-user").run(); + // client.users().db().roleAssigner().withUserId("custom-user").withRoles("testRole").run(); + + // START AssignedUsers + Result> assignmentsResult = client.roles() + .userAssignmentsGetter() + .withRole("testRole") + .run(); + List assignedUsers = assignmentsResult.getResult(); + + for (UserAssignment user : assignedUsers) { + System.out.println(user.getUserId() + " - " + user.getUserType()); + } + // END AssignedUsers + + assertNotNull(assignedUsers); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldListAllRoles() { + // START ListAllRoles + Result> rolesResult = client.roles().allGetter().run(); + List allRoles = rolesResult.getResult(); + + for (Role role : allRoles) { + System.out.println(role.getName() + ": " + role.getPermissions()); + } + // END ListAllRoles + + assertNotNull(allRoles); + assertTrue(allRoles.size() >= 3); // At least viewer, root, admin + } + + @Test + public void shouldRemovePermissions() { + // Create role with multiple permissions + Permission[] initialPermissions = new Permission[] { + Permission.collections("TargetCollection*", + CollectionsPermission.Action.READ, + CollectionsPermission.Action.CREATE, + CollectionsPermission.Action.DELETE), + Permission.data("TargetCollection*", + DataPermission.Action.READ, + DataPermission.Action.CREATE) + }; + client.roles().creator() + .withName("testRole") + .withPermissions(initialPermissions) + .run(); + + // START RemovePermissions + Permission[] permissions = new Permission[] { + Permission.collections("TargetCollection*", + CollectionsPermission.Action.READ, + CollectionsPermission.Action.CREATE, + CollectionsPermission.Action.DELETE), + Permission.data("TargetCollection*", + DataPermission.Action.READ) + // Note: CREATE is not removed, similar to Python example + }; + + Result removeResult = client.roles().permissionRemover() + .withRole("testRole") + .withPermissions(permissions) + .run(); + // END RemovePermissions + + assertThat(removeResult.getError()).isNull(); + + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldDeleteRole() { + client.roles().creator() + .withName("testRole") + .withPermissions(Permission.cluster(ClusterPermission.Action.READ)) + .run(); + + // START DeleteRole + client.roles().deleter() + .withName("testRole") + .run(); + // END DeleteRole + + Result exists = client.roles().exists() + .withName("testRole") + .run(); + assertFalse(exists.getResult()); + } +} \ No newline at end of file diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-users.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-users.java new file mode 100644 index 00000000..fa9cf9ef --- /dev/null +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-users.java @@ -0,0 +1,342 @@ +package io.weaviate.docs; + +import io.weaviate.client.Config; +import io.weaviate.client.WeaviateAuthClient; +import io.weaviate.client.WeaviateClient; +import io.weaviate.client.base.Result; +import io.weaviate.client.v1.auth.exception.AuthException; +import io.weaviate.client.v1.rbac.model.*; +import io.weaviate.client.v1.users.model.UserDb; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.*; + +class UserManagementTest { + + private static WeaviateClient client; + + @BeforeAll + public static void beforeAll() throws AuthException { + // START AdminClient + String scheme = "http"; + String host = "localhost"; + String port = "8580"; // Custom port for RBAC testing + + Config config = new Config(scheme, host + ":" + port); + + client = WeaviateAuthClient.apiKey(config, "root-user-key"); + // END AdminClient + } + + @BeforeEach + public void cleanup() { + // Clean up test user before each test + client.users().db().deleter().withUserId("custom-user").run(); + + // Clean up test role + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldCreateUser() { + // START CreateUser + Result userApiKeyResult = client.users().db().creator() + .withUserId("custom-user") + .run(); + String userApiKey = userApiKeyResult.getResult(); + System.out.println(userApiKey); + // END CreateUser + + assertThat(userApiKeyResult.getError()).isNull(); + assertNotNull(userApiKey); + assertTrue(userApiKey.length() > 0); + + // Cleanup + client.users().db().deleter().withUserId("custom-user").run(); + } + + @Test + public void shouldRotateApiKey() { + // Create user first + Result createResult = client.users().db().creator() + .withUserId("custom-user") + .run(); + String userApiKey = createResult.getResult(); + + // START RotateApiKey + Result rotateResult = client.users().db().keyRotator() + .withUserId("custom-user") + .run(); + String newApiKey = rotateResult.getResult(); + System.out.println(newApiKey); + // END RotateApiKey + + assertThat(rotateResult.getError()).isNull(); + assertNotNull(newApiKey); + assertTrue(newApiKey.length() > 0); + assertNotEquals(userApiKey, newApiKey); + + // Cleanup + client.users().db().deleter().withUserId("custom-user").run(); + } + + // TODO[g-despot]: Check if this works without .includePermissions(true) + @Test + public void shouldAssignRoles() { + // Setup: Create user and role + client.users().db().creator().withUserId("custom-user").run(); + + Permission[] permissions = new Permission[] { + Permission.collections( + "TargetCollection*", + CollectionsPermission.Action.READ, + CollectionsPermission.Action.CREATE), + Permission.data( + "TargetCollection*", + DataPermission.Action.READ, + DataPermission.Action.CREATE) + }; + + client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + + // START AssignRole + Result assignResult = client.users().db().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + // END AssignRole + + assertThat(assignResult.getError()).isNull(); + + // Verify roles were assigned + Result> rolesResult = client.users().db().userRolesGetter() + .withUserId("custom-user") + .includePermissions(true) + .run(); + List assignedRoles = rolesResult.getResult(); + + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + + // Cleanup + client.users().db().deleter().withUserId("custom-user").run(); + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldListAllUsers() { + // Create a test user first + client.users().db().creator().withUserId("custom-user").run(); + + // START ListAllUsers + Result> allUsersResult = client.users().db().allGetter().run(); + List allUsers = allUsersResult.getResult(); + System.out.println(allUsers); + // END ListAllUsers + + assertThat(allUsersResult.getError()).isNull(); + assertNotNull(allUsers); + assertTrue(allUsers.stream().anyMatch(u -> u.getUserId().equals("custom-user"))); + + // Cleanup + client.users().db().deleter().withUserId("custom-user").run(); + } + + @Test + public void shouldListUserRoles() { + // Setup: Create user and assign roles + client.users().db().creator().withUserId("custom-user").run(); + + Permission[] permissions = new Permission[] { + Permission.collections("TargetCollection*", CollectionsPermission.Action.READ) + }; + + client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + + client.users().db().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + + // START ListUserRoles + Result> userRolesResult = client.users().db().userRolesGetter() + .withUserId("custom-user") + .includePermissions(true) + .run(); + List userRoles = userRolesResult.getResult(); + + for (Role role : userRoles) { + System.out.println(role.getName()); + } + // END ListUserRoles + + assertThat(userRolesResult.getError()).isNull(); + assertTrue(userRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(userRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + + // Cleanup + client.users().db().deleter().withUserId("custom-user").run(); + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldRevokeRoles() { + // Setup: Create user, role, and assign roles + client.users().db().creator().withUserId("custom-user").run(); + + Permission[] permissions = new Permission[] { + Permission.collections("TargetCollection*", CollectionsPermission.Action.READ) + }; + + client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + + client.users().db().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + + // START RevokeRoles + Result revokeResult = client.users().db().revoker() + .withUserId("custom-user") + .witRoles("testRole") + .run(); + // END RevokeRoles + + assertThat(revokeResult.getError()).isNull(); + + // Verify role was revoked + Result> rolesResult = client.users().db().userRolesGetter() + .withUserId("custom-user") + .includePermissions(true) + .run(); + List remainingRoles = rolesResult.getResult(); + + assertFalse(remainingRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + + // Cleanup + client.users().db().deleter().withUserId("custom-user").run(); + client.roles().deleter().withName("testRole").run(); + } + + @Test + public void shouldDeleteUser() { + // Create user first + client.users().db().creator().withUserId("custom-user").run(); + + // START DeleteUser + Result deleteResult = client.users().db().deleter() + .withUserId("custom-user") + .run(); + // END DeleteUser + + assertThat(deleteResult.getError()).isNull(); + assertTrue(deleteResult.getResult()); + + // Verify user was deleted + Result> allUsersResult = client.users().db().allGetter().run(); + List allUsers = allUsersResult.getResult(); + + assertFalse(allUsers.stream().anyMatch(u -> u.getUserId().equals("custom-user")), + "custom-user not deleted"); + } + + @Test + public void shouldCompleteUserLifecycle() { + // This test combines all operations in sequence similar to the Python script + + // Create user + Result createResult = client.users().db().creator() + .withUserId("custom-user") + .run(); + String userApiKey = createResult.getResult(); + assertNotNull(userApiKey); + assertTrue(userApiKey.length() > 0); + + // Rotate API key + Result rotateResult = client.users().db().keyRotator() + .withUserId("custom-user") + .run(); + String newApiKey = rotateResult.getResult(); + assertNotEquals(userApiKey, newApiKey); + + // Create role for assignment + Permission[] permissions = new Permission[] { + Permission.collections( + "TargetCollection*", + CollectionsPermission.Action.READ, + CollectionsPermission.Action.CREATE), + Permission.data( + "TargetCollection*", + DataPermission.Action.READ, + DataPermission.Action.CREATE) + }; + + client.roles().creator() + .withName("testRole") + .withPermissions(permissions) + .run(); + + // Assign roles + client.users().db().assigner() + .withUserId("custom-user") + .witRoles("testRole", "viewer") + .run(); + + // Verify roles assigned + Result> rolesResult = client.users().db().userRolesGetter() + .withUserId("custom-user") + .includePermissions(true) + .run(); + List assignedRoles = rolesResult.getResult(); + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + assertTrue(assignedRoles.stream().anyMatch(r -> r.getName().equals("viewer"))); + + // List all users + Result> allUsersResult = client.users().db().allGetter().run(); + assertTrue(allUsersResult.getResult().stream() + .anyMatch(u -> u.getUserId().equals("custom-user"))); + + // Revoke one role + client.users().db().revoker() + .withUserId("custom-user") + .witRoles("testRole") + .run(); + + // Verify role revoked + rolesResult = client.users().db().userRolesGetter() + .withUserId("custom-user") + .includePermissions(true) + .run(); + List remainingRoles = rolesResult.getResult(); + assertFalse(remainingRoles.stream().anyMatch(r -> r.getName().equals("testRole"))); + + // Delete user + Result deleteResult = client.users().db().deleter() + .withUserId("custom-user") + .run(); + assertTrue(deleteResult.getResult()); + + // Verify user deleted + allUsersResult = client.users().db().allGetter().run(); + assertFalse(allUsersResult.getResult().stream() + .anyMatch(u -> u.getUserId().equals("custom-user"))); + + // Cleanup role + client.roles().deleter().withName("testRole").run(); + } +} \ No newline at end of file diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/rq-compression.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rq-compression.java index 4521636b..8b401e40 100644 --- a/_includes/code/howto/java/src/test/java/io/weaviate/docs/rq-compression.java +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/rq-compression.java @@ -85,6 +85,48 @@ public void shouldEnableRQ() { .returns(true, Result::getResult); } + @Test + public void shouldEnable1BitRQ() { + // ============================== + // ===== EnableRQ ===== + // ============================== + + // Delete collection if exists + client.schema().classDeleter() + .withClassName("MyCollection") + .run(); + + // START 1BitEnableRQ + WeaviateClass myCollection = WeaviateClass.builder() + .className("MyCollection") + .vectorizer("text2vec-openai") + .vectorIndexConfig(VectorIndexConfig.builder() + // highlight-start + .rq(RQConfig.builder() + .enabled(true) + .bits(1L) + .build()) + // highlight-end + .build()) + .properties(Arrays.asList( + Property.builder() + .name("title") + .dataType(Arrays.asList(DataType.TEXT)) + .build())) + .build(); + + Result createResult = client.schema().classCreator() + .withClass(myCollection) + .run(); + // END 1BitEnableRQ + + assertThat(createResult).isNotNull() + .withFailMessage(() -> createResult.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + } + @Test public void shouldEnableRQWithOptions() { // ============================== @@ -104,7 +146,7 @@ public void shouldEnableRQWithOptions() { // highlight-start .rq(RQConfig.builder() .enabled(true) - .bits(8L) // Optional: Number of bits, only 8 is supported for now + .bits(8L) // Optional: Number of bits .rescoreLimit(20L) // Optional: Number of candidates to fetch before rescoring .build()) // highlight-end @@ -190,7 +232,6 @@ public void shouldUpdateSchemaWithRQ() { .vectorIndexConfig(VectorIndexConfig.builder() .rq(RQConfig.builder() .enabled(true) - .rescoreLimit(20L) // Optional: Number of candidates to fetch before rescoring .build()) .build()) .build(); @@ -220,4 +261,81 @@ public void shouldUpdateSchemaWithRQ() { .extracting(VectorIndexConfig::getRq).isNotNull() .returns(true, RQConfig::getEnabled); } + + @Test + public void shouldUpdateSchemaWith1BitRQ() { + // ============================== + // ===== UpdateSchema ===== + // ============================== + + // Delete collection if exists + client.schema().classDeleter() + .withClassName("MyCollection") + .run(); + + // First create a collection without RQ + WeaviateClass initialCollection = WeaviateClass.builder() + .className("MyCollection") + .description("A collection without RQ") + .vectorizer("text2vec-openai") + .properties(Arrays.asList( + Property.builder() + .name("title") + .dataType(Arrays.asList(DataType.TEXT)) + .build())) + .build(); + + Result createResult = client.schema().classCreator() + .withClass(initialCollection) + .run(); + + assertThat(createResult).isNotNull() + .withFailMessage(() -> createResult.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + + // START 1BitUpdateSchema + WeaviateClass updatedCollection = WeaviateClass.builder() + .className("MyCollection") + .description("Updated collection with RQ compression") + .properties(Arrays.asList( + Property.builder() + .name("title") + .dataType(Arrays.asList(DataType.TEXT)) + .build())) + .vectorizer("text2vec-openai") + .vectorIndexConfig(VectorIndexConfig.builder() + .rq(RQConfig.builder() + .enabled(true) + .bits(1L) + .build()) + .build()) + .build(); + + Result updateResult = client.schema().classUpdater() + .withClass(updatedCollection) + .run(); + // END 1BitUpdateSchema + + assertThat(updateResult).isNotNull() + .withFailMessage(() -> updateResult.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + + // Verify the RQ configuration was applied + Result getResult = client.schema().classGetter() + .withClassName("MyCollection") + .run(); + + assertThat(getResult).isNotNull() + .withFailMessage(() -> getResult.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .extracting(Result::getResult).isNotNull() + .extracting(WeaviateClass::getVectorIndexConfig).isNotNull() + .extracting(VectorIndexConfig::getRq).isNotNull() + .returns(true, RQConfig::getEnabled); + } } \ No newline at end of file diff --git a/_includes/code/howto/manage-data.aliases.py b/_includes/code/howto/manage-data.aliases.py index 476e5d01..8a753374 100644 --- a/_includes/code/howto/manage-data.aliases.py +++ b/_includes/code/howto/manage-data.aliases.py @@ -1,13 +1,14 @@ import weaviate import weaviate.classes as wvc -# Connect to Weaviate +# START ConnectToWeaviate +# Connect to local Weaviate instance client = weaviate.connect_to_local() +# END ConnectToWeaviate # Cleanup -client.alias.delete(alias_name="ArticlesProd") -client.alias.delete(alias_name="MyArticles") -client.alias.delete(alias_name="Products") +print("deleted:", client.alias.delete(alias_name="ArticlesAlias")) +client.alias.delete(alias_name="ProductsAlias") client.collections.delete("Articles") client.collections.delete("ArticlesV2") client.collections.delete("Products_v1") @@ -25,17 +26,15 @@ ) # Create an alias pointing to the collection -client.alias.create(alias_name="ArticlesProd", target_collection="Articles") +client.alias.create(alias_name="ArticlesAlias", target_collection="Articles") # END CreateAlias # START ListAllAliases # Get all aliases in the instance all_aliases = client.alias.list_all() -# Filter to show only aliases from this example for alias_name, alias_info in all_aliases.items(): - if alias_info.collection in ["Articles", "ArticlesV2"]: - print(f"Alias: {alias_info.alias} -> Collection: {alias_info.collection}") + print(f"Alias: {alias_info.alias} -> Collection: {alias_info.collection}") # END ListAllAliases # START ListCollectionAliases @@ -48,7 +47,7 @@ # START GetAlias # Get information about a specific alias -alias_info = client.alias.get(alias_name="ArticlesProd") +alias_info = client.alias.get(alias_name="ArticlesAlias") if alias_info: print(f"Alias: {alias_info.alias}") @@ -71,7 +70,7 @@ # Update the alias to point to the new collection success = client.alias.update( - alias_name="ArticlesProd", new_target_collection="ArticlesV2" + alias_name="ArticlesAlias", new_target_collection="ArticlesV2" ) if success: @@ -92,19 +91,13 @@ ], ) # END UseAlias -# Delete alias if it exists from a previous run -client.alias.delete(alias_name="MyArticles") - # START DeleteAlias # Delete an alias (the underlying collection remains) -client.alias.delete(alias_name="ArticlesProd") +client.alias.delete(alias_name="ArticlesAlias") # END DeleteAlias # START UseAlias -# Create an alias for easier access -client.alias.create(alias_name="MyArticles", target_collection="Articles") - # Use the alias just like a collection name -articles = client.collections.use("MyArticles") +articles = client.collections.use("ArticlesAlias") # Insert data using the alias articles.data.insert( @@ -119,15 +112,10 @@ for obj in results.objects: print(f"Found: {obj.properties['title']}") - -# Add a new property using the alias -articles.config.add_property( - wvc.config.Property(name="author", data_type=wvc.config.DataType.TEXT) -) # END UseAlias -# START MigrationExample -# Step 1: Create original collection with data +# START Step1CreateOriginal +# Create original collection with data client.collections.create( name="Products_v1", vector_config=wvc.config.Configure.Vectors.self_provided() ) @@ -136,11 +124,28 @@ products_v1.data.insert_many( [{"name": "Product A", "price": 100}, {"name": "Product B", "price": 200}] ) +# END Step1CreateOriginal -# Step 2: Create alias pointing to current collection -client.alias.create(alias_name="Products", target_collection="Products_v1") +# START Step2CreateAlias +# Create alias pointing to current collection +client.alias.create(alias_name="ProductsAlias", target_collection="Products_v1") +# END Step2CreateAlias + +# START MigrationUseAlias +# Your application always uses the alias name "Products" +products = client.collections.use("ProductsAlias") + +# Insert data through the alias +products.data.insert({"name": "Product C", "price": 300}) + +# Query through the alias +results = products.query.fetch_objects(limit=5) +for obj in results.objects: + print(f"Product: {obj.properties['name']}, Price: ${obj.properties['price']}") +# END MigrationUseAlias -# Step 3: Create new collection with updated schema +# START Step3NewCollection +# Create new collection with updated schema client.collections.create( name="Products_v2", vector_config=wvc.config.Configure.Vectors.self_provided(), @@ -152,8 +157,10 @@ ), # New field ], ) +# END Step3NewCollection -# Step 4: Migrate data to new collection +# START Step4MigrateData +# Migrate data to new collection products_v2 = client.collections.use("Products_v2") old_data = products_v1.query.fetch_objects().objects @@ -165,24 +172,27 @@ "category": "General", # Default value for new field } ) +# END Step4MigrateData -# Step 5: Switch alias to new collection (instant switch!) -client.alias.update(alias_name="Products", new_target_collection="Products_v2") +# START Step5UpdateAlias +# Switch alias to new collection (instant switch!) +client.alias.update(alias_name="ProductsAlias", new_target_collection="Products_v2") # All queries using "Products" alias now use the new collection -products = client.collections.use("Products") +products = client.collections.use("ProductsAlias") result = products.query.fetch_objects(limit=1) print(result.objects[0].properties) # Will include the new "category" field +# END Step5UpdateAlias -# Step 6: Clean up old collection after verification +# START Step6Cleanup +# Clean up old collection after verification client.collections.delete("Products_v1") -# END MigrationExample +# END Step6Cleanup # Cleanup -client.alias.delete(alias_name="MyArticles") -client.alias.delete(alias_name="Products") -client.alias.delete(alias_name="ArticlesProd") +client.alias.delete(alias_name="ProductsAlias") +client.alias.delete(alias_name="ArticlesAlias") client.collections.delete("Articles") client.collections.delete("ArticlesV2") client.collections.delete("Products_v1") diff --git a/_includes/code/howto/manage-data.aliases.ts b/_includes/code/howto/manage-data.aliases.ts index a5cf8978..70c5eb11 100644 --- a/_includes/code/howto/manage-data.aliases.ts +++ b/_includes/code/howto/manage-data.aliases.ts @@ -1,19 +1,15 @@ -import weaviate, { WeaviateClient} from 'weaviate-client' +import weaviate, { WeaviateClient } from 'weaviate-client' const openaiKey = process.env.OPENAI_API_KEY as string -// Connect to Weaviate -const client: WeaviateClient = await weaviate.connectToLocal({ - authCredentials: new weaviate.ApiKey('YOUR-WEAVIATE-API-KEY'), - headers: { - 'X-OpenAI-Api-Key': openaiKey as string, // Replace with your inference API key - } - } -) +// START ConnectToWeaviate +// Connect to local Weaviate instance +const client: WeaviateClient = await weaviate.connectToLocal() +// END ConnectToWeaviate // Cleanup const aliases = await client.alias.listAll() -const collections = ["Articles", "ArticlesV2", "ProductsV1", "ProductsV2"] +const collections = ["Articles", "ArticlesV2", "Products_v1", "Products_v2"] if (aliases) { for (const item of aliases) { @@ -33,7 +29,7 @@ for (const collection of collections) { await client.collections.create({ name: "Articles", vectorizers: weaviate.configure.vectors.selfProvided(), - properties:[ + properties: [ { name: "title", dataType: weaviate.configure.dataType.TEXT }, { name: "content", dataType: weaviate.configure.dataType.TEXT }, ], @@ -41,21 +37,20 @@ await client.collections.create({ console.log('Created collection "Articles"') // Create an alias pointing to the collection -await client.alias.create({ - alias: "ArticlesProd", +await client.alias.create({ + alias: "ArticlesAlias", collection: "Articles" }) -console.log('Created alias "ArticlesProd"') +console.log('Created alias "ArticlesAlias"') // END CreateAlias // START ListAllAliases // Get all aliases in the instance const allAliases = await client.alias.listAll() -// Filter to show only aliases from this example -for (const [aliasName, aliasInfo] of Object.entries(allAliases)) { - if (["Articles", "ArticlesV2"].includes(aliasInfo.collection)) { +if (allAliases) { + for (const [_, aliasInfo] of Object.entries(allAliases)) { console.log(`Alias: ${aliasInfo.alias} -> Collection: ${aliasInfo.collection}`); } } @@ -63,17 +58,19 @@ for (const [aliasName, aliasInfo] of Object.entries(allAliases)) { // START ListCollectionAliases // Get all aliases pointing to a specific collection -const collectionAliases = await client.alias.listAll({ collection: "Articles"}) +const collectionAliases = await client.alias.listAll({ collection: "Articles" }) -for (const [aliasName, aliasInfo] of Object.entries(collectionAliases)) { - console.log(`Alias pointing to Articles: ${aliasInfo.alias}`); +if (collectionAliases) { + for (const [_, aliasInfo] of Object.entries(collectionAliases)) { + console.log(`Alias pointing to Articles: ${aliasInfo.alias}`); + } } // END ListCollectionAliases // START GetAlias // Get information about a specific alias -const aliasInfo = await client.alias.get("ArticlesProd") +const aliasInfo = await client.alias.get("ArticlesAlias") if (aliasInfo) { console.log(`Alias: ${aliasInfo.alias}`); @@ -95,7 +92,7 @@ await client.collections.create({ // Update the alias to point to the new collection await client.alias.update({ - alias: "ArticlesProd", + alias: "ArticlesAlias", newTargetCollection: "ArticlesV2" }) @@ -116,27 +113,26 @@ await client.collections.create({ ], }) // END UseAlias -// Delete alias if it exists from a previous run -await client.alias.delete("MyArticles") // START DeleteAlias // Delete an alias (the underlying collection remains) -await client.alias.delete("ArticlesProd") +await client.alias.delete("ArticlesAlias") // END DeleteAlias + // START UseAlias // Create an alias for easier access -await client.alias.create({ - alias: "MyArticles", +await client.alias.create({ + alias: "ArticlesAlias", collection: "Articles" }) // Use the alias just like a collection name -const articles = client.collections.use("MyArticles") +const articles = client.collections.use("ArticlesAlias") // Insert data using the alias await articles.data.insert({ - "title": "Using Aliases in Weaviate", - "content": "Aliases make collection management easier...", + "title": "Using Aliases in Weaviate", + "content": "Aliases make collection management easier...", }) // Query using the alias @@ -145,36 +141,50 @@ const results = await articles.query.fetchObjects({ limit: 5 }) for (const obj of results.objects) { console.log(`Found: ${obj.properties['title']}`); } - -// Add a new property using the alias -await articles.config.addProperty( - { name: "author", dataType: weaviate.configure.dataType.TEXT } -) // END UseAlias -// START MigrationExample -// Step 1: Create original collection with data +// START Step1CreateOriginal +// Create original collection with data await client.collections.create({ - name: "ProductsV1", + name: "Products_v1", vectorizers: weaviate.configure.vectors.selfProvided() }) -const productsV1 = client.collections.use("ProductsV1") +const products_v1 = client.collections.use("Products_v1") -await productsV1.data.insertMany([ - {"name": "Product A", "price": 100}, - {"name": "Product B", "price": 200} +await products_v1.data.insertMany([ + { "name": "Product A", "price": 100 }, + { "name": "Product B", "price": 200 } ]) +// END Step1CreateOriginal -// Step 2: Create alias pointing to current collection -await client.alias.create({ - alias:"Products", - collection: "ProductsV1" +// START Step2CreateAlias +// Create alias pointing to current collection +await client.alias.create({ + alias: "ProductsAlias", + collection: "Products_v1" }) -// Step 3: Create new collection with updated schema +// END Step2CreateAlias + +// START MigrationUseAlias +// Your application always uses the alias name "Products" +const prods = client.collections.use("ProductsAlias"); + +// Insert data through the alias +await prods.data.insert({ name: "Product C", price: 300 }); + +// Query through the alias +const res = await prods.query.fetchObjects({ limit: 5 }); +for (const obj of res.objects) { + console.log(`Product: ${obj.properties.name}, Price: $${obj.properties.price}`); +} +// END MigrationUseAlias + +// START Step3NewCollection +// Create new collection with updated schema await client.collections.create({ - name: "ProductsV2", + name: "Products_v2", vectorizers: weaviate.configure.vectors.selfProvided(), properties: [ { name: "name", dataType: weaviate.configure.dataType.TEXT }, @@ -182,36 +192,39 @@ await client.collections.create({ { name: "category", dataType: weaviate.configure.dataType.TEXT }, // New field ], }) +// END Step3NewCollection -// Step 4: Migrate data to new collection -const productsV2 = client.collections.use("ProductsV2") - -const oldData = (await productsV1.query.fetchObjects()).objects +// START Step4MigrateData +// Migrate data to new collection +const products_v2 = client.collections.use("Products_v2") +const oldData = (await products_v1.query.fetchObjects()).objects for (const obj of oldData) { - productsV2.data.insert({ - "name": obj.properties["name"], - "price": obj.properties["price"], - "category": "General", // Default value for new field + await products_v2.data.insert({ + "name": obj.properties["name"], + "price": obj.properties["price"], + "category": "General", // Default value for new field }) } -// Step 5: Switch alias to new collection (instant switch!) -await client.alias.update({ - alias: "Products", - newTargetCollection: "ProductsV2" -}) +// END Step4MigrateData -// All queries using "Products" alias now use the new collection -const products = client.collections.use("Products") +// START Step5UpdateAlias +// Switch alias to new collection (instant switch!) +await client.alias.update({ + alias: "ProductsAlias", + newTargetCollection: "Products_v2" +}) +// All queries using "ProductsAlias" alias now use the new collection +const products = client.collections.use("ProductsAlias") const result = await products.query.fetchObjects({ limit: 1 }) - console.log(result.objects[0].properties) // Will include the new "category" field +// END Step5UpdateAlias -// Step 6: Clean up old collection after verification -await client.collections.delete("ProductsV1") -// END MigrationExample - +// START Step6Cleanup +// Clean up old collection after verification +await client.collections.delete("Products_v1") +// END Step6Cleanup // Cleanup const cleanUpAliases = await client.alias.listAll() diff --git a/_includes/code/howto/manage-data.import.py b/_includes/code/howto/manage-data.import.py index 0f15369f..221b60b7 100644 --- a/_includes/code/howto/manage-data.import.py +++ b/_includes/code/howto/manage-data.import.py @@ -557,4 +557,49 @@ def add_object(obj) -> None: # Clean up client.collections.delete("NewCollection") + +# ================================================== +# ===== Server-side (automatic) batch import ===== +# ================================================== + +# Re-create the collection +client.collections.delete("MyCollection") +client.collections.create( + "MyCollection", + vector_config=Configure.Vectors.self_provided() +) + +# START ServerSideBatchImportExample +data_rows = [ + {"title": f"Object {i+1}"} for i in range(5) +] + +collection = client.collections.get("MyCollection") + +# highlight-start +# Use `automatic` for server-side batching. The client will send data +# in chunks and the server will dynamically manage the import process. +with collection.batch.automatic() as batch: + for data_row in data_rows: + batch.add_object( + properties=data_row, + ) +# highlight-end + if batch.number_errors > 10: + print("Batch import stopped due to excessive errors.") + break + +failed_objects = collection.batch.failed_objects +if failed_objects: + print(f"Number of failed imports: {len(failed_objects)}") + print(f"First failed object: {failed_objects[0]}") +# END ServerSideBatchImportExample + +result = collection.aggregate.over_all(total_count=True) +assert result.total_count == 5 + +# Clean up +client.collections.delete(collection.name) + + client.close() diff --git a/_includes/code/howto/search.filters.py b/_includes/code/howto/search.filters.py index 106d9622..6ebcd03c 100644 --- a/_includes/code/howto/search.filters.py +++ b/_includes/code/howto/search.filters.py @@ -136,6 +136,37 @@ # End test +# ========================================== +# ===== ContainsNoneFilter ===== +# ========================================== + +# START ContainsNoneFilter +from weaviate.classes.query import Filter + +jeopardy = client.collections.get("JeopardyQuestion") + +# highlight-start +token_list = ["bird", "animal"] +# highlight-end + +response = jeopardy.query.fetch_objects( + # highlight-start + # Find objects where the `question` property contains none of the strings in `token_list` + filters=Filter.by_property("question").contains_none(token_list), + # highlight-end + limit=3 +) + +for o in response.objects: + print(o.properties) +# END ContainsNoneFilter + +# Test results +assert response.objects[0].collection == "JeopardyQuestion" +assert (token_list[0] not in response.objects[0].properties["question"].lower() and token_list[1] not in response.objects[0].properties["question"].lower()) +# End test + + # ========================================== # ===== Partial Match Filter ===== # ========================================== @@ -147,7 +178,7 @@ jeopardy = client.collections.use("JeopardyQuestion") response = jeopardy.query.fetch_objects( # highlight-start - filters=Filter.by_property("answer").like("*inter*"), + filters=Filter.by_property("answer").like("*ala*"), # highlight-end limit=3 ) @@ -159,7 +190,7 @@ # Test results assert response.objects[0].collection == "JeopardyQuestion" -assert "inter" in response.objects[0].properties["answer"].lower() +assert "ala" in response.objects[0].properties["answer"].lower() # End test @@ -177,7 +208,8 @@ # | as OR filters=( Filter.by_property("round").equal("Double Jeopardy!") & - Filter.by_property("points").less_than(600) + Filter.by_property("points").less_than(600) & + Filter.not_(Filter.by_property("answer").equal("Yucatan")) ), # highlight-end limit=3 diff --git a/_includes/code/python/howto.configure.rbac.oidc.groups.py b/_includes/code/python/howto.configure.rbac.oidc.groups.py new file mode 100644 index 00000000..19d23433 --- /dev/null +++ b/_includes/code/python/howto.configure.rbac.oidc.groups.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 +""" +OIDC Group Management Testing Script with Built-in Keycloak Setup Helper +Complete example of how to configure RBAC with OIDC groups in Weaviate +""" + +import requests +import sys +from typing import Optional +import weaviate +from weaviate.classes.init import Auth +from weaviate.classes.rbac import Permissions + + +def test_keycloak_connection(keycloak_ports: list = [8081]) -> Optional[str]: + """Test if Keycloak is accessible on common ports""" + # Try keycloak hostname first (requires /etc/hosts mapping), then localhost + keycloak_configs = [ + ("keycloak", 8081), # This should match Weaviate's expected issuer + ("localhost", 8081), # Fallback for initial testing + ] + + for host, port in keycloak_configs: + keycloak_url = f"http://{host}:{port}" + try: + # First check if Keycloak is responding at all + response = requests.get(f"{keycloak_url}", timeout=5) + if response.status_code == 200: + print(f"OK: Keycloak server found at {keycloak_url}") + + # Check if master realm exists (always exists) + master_response = requests.get( + f"{keycloak_url}/realms/master", timeout=5 + ) + if master_response.status_code == 200: + print(f"OK: Keycloak realms accessible") + + # Check if our test realm exists + test_response = requests.get( + f"{keycloak_url}/realms/weaviate-test", timeout=5 + ) + if test_response.status_code == 200: + print(f"OK: weaviate-test realm found") + print(f"OK: weaviate-test realm accessible") + return keycloak_url + else: + print( + f"Warning: weaviate-test realm not found - you'll need to create it" + ) + return keycloak_url + except Exception as e: + print(f"Testing {keycloak_url}: {e}") + continue + + print(f"Error: Cannot connect to Keycloak") + print("Hint: Make sure you have '127.0.0.1 keycloak' in /etc/hosts") + print("Hint: Run: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts") + return None + + +def get_oidc_token( + keycloak_url: str, + client_secret: str, + username: str, + password: str = "password123", + realm: str = "weaviate-test", + client_id: str = "weaviate", +) -> Optional[str]: + """Get OIDC token from Keycloak for a user""" + token_url = f"{keycloak_url}/realms/{realm}/protocol/openid-connect/token" + + data = { + "grant_type": "password", + "client_id": client_id, + "client_secret": client_secret, + "username": username, + "password": password, + } + + try: + response = requests.post(token_url, data=data, timeout=10) + + if response.status_code == 200: + token_data = response.json() + print(f"OK: Successfully got token for user: {username}") + return token_data["access_token"] + else: + print(f"Error: Failed to get token for {username}: {response.status_code}") + if response.status_code == 401: + print(" → Check username/password or client secret") + elif response.status_code == 400: + print(" → Check client configuration (Direct Access Grants enabled?)") + print(f" → Response: {response.text}") + return None + + except Exception as e: + print(f"Error: Error getting token for {username}: {e}") + return None + + +def setup_and_validate_oidc() -> tuple[Optional[str], Optional[str]]: + """Setup and validate OIDC connection, return (client_secret, keycloak_url) if successful""" + print("KEYCLOAK OIDC SETUP VALIDATOR") + print("=" * 50) + + # Test Keycloak connection + print("Testing Keycloak connection...") + keycloak_url = test_keycloak_connection([8081]) + if not keycloak_url: + print("Error: Keycloak not accessible!") + print("\nTroubleshooting:") + print( + "1. Add keycloak to /etc/hosts: echo '127.0.0.1 keycloak' | sudo tee -a /etc/hosts" + ) + print("2. Check if docker-compose is running: docker-compose ps") + print("3. Check Keycloak logs: docker-compose logs keycloak") + return None, None + + # Check if weaviate-test realm exists + try: + realm_response = requests.get(f"{keycloak_url}/realms/weaviate-test", timeout=5) + realm_exists = realm_response.status_code == 200 + except: + realm_exists = False + + if not realm_exists: + print(f"\nWarning: The 'weaviate-test' realm doesn't exist yet.") + print( + "Please complete the Keycloak setup first with keycloak_helper_script.py, then run this script again." + ) + return None, None + + else: + print(f"OK: weaviate-test realm accessible") + print("\n" + "-" * 30) + # Using a fixed secret for automated testing + client_secret = "weaviate-client-secret-123" + print(f"Using client secret: {client_secret}") + + # Test tokens with the keycloak_url (which should be http://keycloak:8081) + print(f"\nTesting OIDC tokens...") + admin_token = get_oidc_token( + keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin" + ) + + if not admin_token: + print("\nError: Cannot get admin token. Please verify:") + print("- User 'test-admin' exists with password 'password123'") + print("- User is in groups like '/admin-group'") + print("- Client 'weaviate' has 'Direct access grants' enabled") + print("- Client secret is correct") + return None, None + + viewer_token = get_oidc_token( + keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer" + ) + if not viewer_token: + print("Warning: Viewer token failed, but continuing with admin token") + + print("\nOK: OIDC setup validated successfully!") + return client_secret, keycloak_url + + +# Setup and validate OIDC first +client_secret, keycloak_url = setup_and_validate_oidc() +if not client_secret or not keycloak_url: + sys.exit(1) + +print("\n" + "=" * 60) +print("STARTING OIDC GROUP MANAGEMENT TESTS") +print("=" * 60) + +# The admin_client is used for setup and cleanup that requires root privileges +admin_client = None + +# START AdminClient +# Connect to Weaviate as root user (for admin operations) +admin_client = weaviate.connect_to_local( + port=8580, + grpc_port=50551, + auth_credentials=Auth.api_key("root-user-key"), +) +# END AdminClient + +# Create test roles for group management +print("\nSetting up test roles...") +permissions = [ + Permissions.collections( + collection="TargetCollection*", read_config=True, create_collection=True + ), + Permissions.data(collection="TargetCollection*", read=True, create=True), +] + +admin_client.roles.delete(role_name="testRole") +admin_client.roles.create(role_name="testRole", permissions=permissions) + +admin_client.roles.delete(role_name="groupViewerRole") +admin_client.roles.create( + role_name="groupViewerRole", + permissions=[Permissions.data(collection="*", read=True)], +) + +print("\nADMIN OPERATIONS (Using API Key)") +print("-" * 40) + +# START AssignOidcGroupRoles +admin_client.groups.oidc.assign_roles( + group_id="/admin-group", role_names=["testRole", "viewer"] +) +# END AssignOidcGroupRoles +admin_client.groups.oidc.assign_roles(group_id="/viewer-group", role_names=["viewer"]) +admin_client.groups.oidc.assign_roles( + group_id="/my-test-group", role_names=["groupViewerRole"] +) + +# START GetKnownOidcGroups +known_groups = admin_client.groups.oidc.get_known_group_names() +print(f"Known OIDC groups ({len(known_groups)}): {known_groups}") +# END GetKnownOidcGroups +assert len(known_groups) == 3 +assert set(known_groups) == {"/admin-group", "/viewer-group", "/my-test-group"} + +# START GetGroupAssignments +group_assignments = admin_client.roles.get_group_assignments(role_name="testRole") +print(f"Groups assigned to role 'testRole':") +for group in group_assignments: + print(f" - Group ID: {group.group_id}, Type: {group.group_type}") +# END GetGroupAssignments +assert len(group_assignments) == 1 +assert group_assignments[0].group_id == "/admin-group" + +print(f"\nOIDC USER OPERATIONS") +print("-" * 40) + +# Get tokens for different users using keycloak_url +# START GetOidcToken +admin_token = get_oidc_token( + keycloak_url=keycloak_url, client_secret=client_secret, username="test-admin" +) +viewer_token = get_oidc_token( + keycloak_url=keycloak_url, client_secret=client_secret, username="test-viewer" +) +# END GetOidcToken +assert admin_token is not None +assert viewer_token is not None + +# --- Admin User Tests --- +# START OidcAdminClient +# Connect as OIDC admin user +oidc_admin_client = weaviate.connect_to_local( + port=8580, + grpc_port=50551, + auth_credentials=Auth.bearer_token(admin_token), +) +# END OidcAdminClient + +# START GetCurrentUserRoles +my_user = oidc_admin_client.users.get_my_user() +current_roles_dict = my_user.roles if my_user else [] +role_names = list(current_roles_dict.keys()) +print(f"Admin user's current roles ({len(role_names)}): {role_names}") +# END GetCurrentUserRoles +assert set(role_names) == {"viewer", "testRole", "groupViewerRole"} + +# START GetOidcGroupRoles +group_roles = oidc_admin_client.groups.oidc.get_assigned_roles( + group_id="/admin-group", include_permissions=True +) +print(f"Roles assigned to '/admin-group': {list(group_roles.keys())}") +# END GetOidcGroupRoles +assert set(group_roles.keys()) == {"testRole", "viewer"} +oidc_admin_client.close() + +# --- Viewer User Tests --- +# START OidcViewerClient +# Connect as OIDC viewer user +oidc_viewer_client = weaviate.connect_to_local( + port=8580, + grpc_port=50551, + auth_credentials=Auth.bearer_token(viewer_token), +) +# END OidcViewerClient + +# START GetCurrentUserRolesViewer +my_user = oidc_viewer_client.users.get_my_user() +current_roles_dict = my_user.roles if my_user else {} +role_names = list(current_roles_dict.keys()) +print(f"Viewer user's current roles ({len(role_names)}): {role_names}") +# END GetCurrentUserRolesViewer +assert role_names == ["viewer"] + +# Viewer should have limited permissions but can still see group names +try: + viewer_groups = oidc_viewer_client.groups.oidc.get_known_group_names() + print(f"Viewer can see groups: {viewer_groups}") + assert set(viewer_groups) == {"/admin-group", "/viewer-group", "/my-test-group"} +except Exception as e: + # This part should not be reached if permissions are set correctly + assert False, f"Viewer user failed to access group operations: {e}" + +oidc_viewer_client.close() + +print(f"\nCLEANUP (Admin operations)") +print("-" * 40) + +# START RevokeOidcGroupRoles +admin_client.groups.oidc.revoke_roles( + group_id="/admin-group", role_names=["testRole", "viewer"] +) +# END RevokeOidcGroupRoles +admin_client.groups.oidc.revoke_roles(group_id="/viewer-group", role_names=["viewer"]) +admin_client.groups.oidc.revoke_roles( + group_id="/my-test-group", role_names=["groupViewerRole"] +) + +# Verify cleanup +final_groups = admin_client.groups.oidc.get_known_group_names() +print(f"Remaining known groups after cleanup: {final_groups}") +assert len(final_groups) == 0 + +admin_client.close() + +print("\n" + "=" * 60) +print("OIDC GROUP MANAGEMENT TESTING COMPLETE!") +print("=" * 60) diff --git a/_includes/code/python/howto.configure.rbac.oidc.users.py b/_includes/code/python/howto.configure.rbac.oidc.users.py index 03fcf75a..67499586 100644 --- a/_includes/code/python/howto.configure.rbac.oidc.users.py +++ b/_includes/code/python/howto.configure.rbac.oidc.users.py @@ -31,11 +31,11 @@ # START AssignOidcUserRole client.users.oidc.assign_roles(user_id="custom-user", role_names=["testRole", "viewer"]) # END AssignOidcUserRole -assert "testRole" in client.users.oidc.get_assigned_roles("custom-user") -assert "viewer" in client.users.oidc.get_assigned_roles("custom-user") +assert "testRole" in client.users.oidc.get_assigned_roles(user_id="custom-user") +assert "viewer" in client.users.oidc.get_assigned_roles(user_id="custom-user") # START ListOidcUserRoles -user_roles = client.users.oidc.get_assigned_roles("custom-user") +user_roles = client.users.oidc.get_assigned_roles(user_id="custom-user") for role in user_roles: print(role) @@ -46,6 +46,6 @@ # START RevokeOidcUserRoles client.users.oidc.revoke_roles(user_id="custom-user", role_names="testRole") # END RevokeOidcUserRoles -assert "testRole" not in client.users.oidc.get_assigned_roles("custom-user") +assert "testRole" not in client.users.oidc.get_assigned_roles(user_id="custom-user") client.close() diff --git a/_includes/code/python/howto.configure.rbac.roles.py b/_includes/code/python/howto.configure.rbac.roles.py index 9b28ebe8..13c0fa59 100644 --- a/_includes/code/python/howto.configure.rbac.roles.py +++ b/_includes/code/python/howto.configure.rbac.roles.py @@ -17,7 +17,7 @@ all_roles = client.roles.list_all() for role_name, _ in all_roles.items(): - if role_name not in ["viewer", "root", "admin"]: + if role_name not in ["viewer", "root", "admin", "read-only"]: client.roles.delete(role_name=role_name) # # START CreateRole @@ -222,8 +222,7 @@ permissions = client.roles.get(role_name="testRole") assert any( - permission.alias == "TargetAlias*" - for permission in permissions.alias_permissions + permission.alias == "TargetAlias*" for permission in permissions.alias_permissions ) client.roles.delete("testRole") @@ -247,13 +246,34 @@ permissions = client.roles.get(role_name="testRole") assert any( - permission.collection == "TargetCollection*" and - permission.shard == "TargetShard*" + permission.collection == "TargetCollection*" and permission.shard == "TargetShard*" for permission in permissions.replicate_permissions ) client.roles.delete("testRole") +# START AddGroupsPermission +from weaviate.classes.rbac import Permissions + +permissions = [ + Permissions.Groups.oidc( + group="TargetGroup*", # Applies to all groups starting with "TargetGroup" + read=True, # Allow reading group information + assign_and_revoke=True, # Allow assigning and revoking group memberships + ), +] + +client.roles.create(role_name="testRole", permissions=permissions) +# END AddGroupsPermission + +permissions = client.roles.get(role_name="testRole") +assert any( + permission.group == "TargetGroup*" + for permission in permissions.groups_permissions +) + +client.roles.delete("testRole") + permissions = [ Permissions.collections( collection="TargetCollection*", diff --git a/_includes/code/python/keycloak_helper_script.py b/_includes/code/python/keycloak_helper_script.py new file mode 100644 index 00000000..c9f0eda0 --- /dev/null +++ b/_includes/code/python/keycloak_helper_script.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python3 +""" +Automated Keycloak Setup Script +Creates realm, client, users, groups, and configures everything needed for OIDC testing +""" + +import requests +import json +import time +from typing import Optional, Dict, Any + + +class KeycloakSetup: + def __init__( + self, + keycloak_url: str = "http://localhost:8081", + admin_user: str = "admin", + admin_pass: str = "admin", + ): + self.keycloak_url = keycloak_url.rstrip("/") + self.admin_user = admin_user + self.admin_pass = admin_pass + self.admin_token = None + self.realm_name = "weaviate-test" + self.client_id = "weaviate" + + def get_admin_token(self) -> Optional[str]: + """Get admin token from master realm""" + print("🔑 Getting admin token...") + + token_url = f"{self.keycloak_url}/realms/master/protocol/openid-connect/token" + data = { + "grant_type": "password", + "client_id": "admin-cli", + "username": self.admin_user, + "password": self.admin_pass, + } + + try: + response = requests.post(token_url, data=data, timeout=10) + if response.status_code == 200: + self.admin_token = response.json()["access_token"] + print("✅ Got admin token") + return self.admin_token + else: + print(f"❌ Failed to get admin token: {response.status_code}") + print(f"Response: {response.text}") + return None + except Exception as e: + print(f"❌ Error getting admin token: {e}") + return None + + def make_admin_request( + self, method: str, endpoint: str, json_data: Dict = None + ) -> requests.Response: + """Make authenticated request to Keycloak Admin API""" + if not self.admin_token: + raise Exception("No admin token available") + + headers = { + "Authorization": f"Bearer {self.admin_token}", + "Content-Type": "application/json", + } + + url = f"{self.keycloak_url}/admin/realms{endpoint}" + + if method.upper() == "GET": + return requests.get(url, headers=headers, timeout=10) + elif method.upper() == "POST": + return requests.post(url, headers=headers, json=json_data, timeout=10) + elif method.upper() == "PUT": + return requests.put(url, headers=headers, json=json_data, timeout=10) + elif method.upper() == "DELETE": + return requests.delete(url, headers=headers, timeout=10) + else: + raise ValueError(f"Unsupported HTTP method: {method}") + + def create_realm(self) -> bool: + """Create the weaviate-test realm""" + print(f"🏛️ Creating realm '{self.realm_name}'...") + + # Check if realm already exists + response = self.make_admin_request("GET", f"/{self.realm_name}") + if response.status_code == 200: + print(f"✅ Realm '{self.realm_name}' already exists") + return True + + # Create realm + realm_config = { + "realm": self.realm_name, + "enabled": True, + "displayName": "Weaviate Test Realm", + "registrationAllowed": False, + "loginWithEmailAllowed": True, + "duplicateEmailsAllowed": False, + "rememberMe": True, + "verifyEmail": False, + "loginTheme": None, + "accountTheme": None, + "adminTheme": None, + "emailTheme": None, + } + + response = self.make_admin_request("POST", "", realm_config) + if response.status_code == 201: + print(f"✅ Created realm '{self.realm_name}'") + return True + else: + print(f"❌ Failed to create realm: {response.status_code}") + print(f"Response: {response.text}") + return False + + def create_client(self) -> Optional[str]: + """Create the weaviate client and return client secret""" + print(f"📱 Creating client '{self.client_id}'...") + + # Check if client already exists + response = self.make_admin_request("GET", f"/{self.realm_name}/clients") + if response.status_code == 200: + clients = response.json() + for client in clients: + if client.get("clientId") == self.client_id: + print(f"✅ Client '{self.client_id}' already exists") + client_uuid = client["id"] + return self.get_client_secret(client_uuid) + + # Create client + client_config = { + "clientId": self.client_id, + "name": "Weaviate OIDC Client", + "enabled": True, + "clientAuthenticatorType": "client-secret", + "secret": "weaviate-client-secret-123", # Fixed secret for easier testing + "redirectUris": ["*"], + "webOrigins": ["*"], + "standardFlowEnabled": True, + "directAccessGrantsEnabled": True, # Enable Direct Access Grants + "serviceAccountsEnabled": True, + "publicClient": False, + "protocol": "openid-connect", + "attributes": { + "saml.assertion.signature": "false", + "saml.force.post.binding": "false", + "saml.multivalued.roles": "false", + "saml.encrypt": "false", + "saml.server.signature": "false", + "saml.server.signature.keyinfo.ext": "false", + "exclude.session.state.from.auth.response": "false", + "saml_force_name_id_format": "false", + "saml.client.signature": "false", + "tls.client.certificate.bound.access.tokens": "false", + "saml.authnstatement": "false", + "display.on.consent.screen": "false", + "saml.onetimeuse.condition": "false", + }, + } + + response = self.make_admin_request( + "POST", f"/{self.realm_name}/clients", client_config + ) + if response.status_code == 201: + print(f"✅ Created client '{self.client_id}'") + + # Get the client UUID + response = self.make_admin_request("GET", f"/{self.realm_name}/clients") + clients = response.json() + for client in clients: + if client.get("clientId") == self.client_id: + client_uuid = client["id"] + return self.get_client_secret(client_uuid) + else: + print(f"❌ Failed to create client: {response.status_code}") + print(f"Response: {response.text}") + return None + + def get_client_secret(self, client_uuid: str) -> Optional[str]: + """Get client secret""" + response = self.make_admin_request( + "GET", f"/{self.realm_name}/clients/{client_uuid}/client-secret" + ) + if response.status_code == 200: + secret = response.json().get("value") + print(f"🔐 Client secret: {secret}") + return secret + return None + + def create_groups(self) -> bool: + """Create test groups""" + print("👥 Creating groups...") + + groups_to_create = [ + {"name": "admin-group", "path": "/admin-group"}, + {"name": "viewer-group", "path": "/viewer-group"}, + {"name": "my-test-group", "path": "/my-test-group"}, + {"name": "another-test-group", "path": "/another-test-group"}, + ] + + for group_config in groups_to_create: + # Check if group exists + response = self.make_admin_request("GET", f"/{self.realm_name}/groups") + if response.status_code == 200: + existing_groups = response.json() + group_exists = any( + g.get("path") == group_config["path"] for g in existing_groups + ) + + if group_exists: + print(f"✅ Group '{group_config['path']}' already exists") + continue + + # Create group + response = self.make_admin_request( + "POST", f"/{self.realm_name}/groups", group_config + ) + if response.status_code == 201: + print(f"✅ Created group '{group_config['path']}'") + else: + print( + f"❌ Failed to create group '{group_config['path']}': {response.status_code}" + ) + return False + + return True + + def create_users(self) -> bool: + """Create test users""" + print("👤 Creating users...") + + users_to_create = [ + { + "username": "test-admin", + "email": "admin@test.com", + "firstName": "Test", + "lastName": "Admin", + "password": "password123", + "groups": ["/admin-group", "/my-test-group"], + }, + { + "username": "test-viewer", + "email": "viewer@test.com", + "firstName": "Test", + "lastName": "Viewer", + "password": "password123", + "groups": ["/viewer-group"], + }, + ] + + for user_config in users_to_create: + # Check if user exists + response = self.make_admin_request( + "GET", f"/{self.realm_name}/users?username={user_config['username']}" + ) + if response.status_code == 200 and response.json(): + print(f"✅ User '{user_config['username']}' already exists") + continue + + # Create user + user_data = { + "username": user_config["username"], + "email": user_config["email"], + "firstName": user_config["firstName"], + "lastName": user_config["lastName"], + "enabled": True, + "emailVerified": True, + } + + response = self.make_admin_request( + "POST", f"/{self.realm_name}/users", user_data + ) + if response.status_code == 201: + print(f"✅ Created user '{user_config['username']}'") + + # Get user ID from location header + user_id = response.headers["Location"].split("/")[-1] + + # Set password + password_data = { + "type": "password", + "value": user_config["password"], + "temporary": False, + } + + pwd_response = self.make_admin_request( + "PUT", + f"/{self.realm_name}/users/{user_id}/reset-password", + password_data, + ) + if pwd_response.status_code == 204: + print(f"✅ Set password for '{user_config['username']}'") + + # Add user to groups + self.add_user_to_groups(user_id, user_config["groups"]) + + else: + print( + f"❌ Failed to create user '{user_config['username']}': {response.status_code}" + ) + return False + + return True + + def add_user_to_groups(self, user_id: str, group_paths: list): + """Add user to specified groups""" + # Get all groups + response = self.make_admin_request("GET", f"/{self.realm_name}/groups") + if response.status_code != 200: + print("❌ Failed to get groups") + return + + all_groups = response.json() + + for group_path in group_paths: + # Find group by path + group_id = None + for group in all_groups: + if group.get("path") == group_path: + group_id = group["id"] + break + + if group_id: + # Add user to group + response = self.make_admin_request( + "PUT", f"/{self.realm_name}/users/{user_id}/groups/{group_id}", {} + ) + if response.status_code == 204: + print(f"✅ Added user to group '{group_path}'") + else: + print( + f"❌ Failed to add user to group '{group_path}': {response.status_code}" + ) + else: + print(f"❌ Group '{group_path}' not found") + + def configure_group_mapper(self, client_uuid: str) -> bool: + """Configure group membership mapper directly on the client.""" + print("⚙️ Configuring group mapper...") + + mapper_config = { + "name": "groups", + "protocol": "openid-connect", + "protocolMapper": "oidc-group-membership-mapper", + "consentRequired": False, + "config": { + "full.path": "true", # Use true for full path like /admin-group + "id.token.claim": "true", + "access.token.claim": "true", + "claim.name": "groups", + "userinfo.token.claim": "true", + }, + } + + # The endpoint is now directly on the client + response = self.make_admin_request( + "POST", + f"/{self.realm_name}/clients/{client_uuid}/protocol-mappers/models", + mapper_config, + ) + + if response.status_code == 201: + print("✅ Configured group membership mapper") + return True + elif response.status_code == 409: # 409 Conflict means it already exists + print("✅ Group membership mapper already exists") + return True + else: + print(f"❌ Failed to configure group mapper: {response.status_code}") + print(f"Response: {response.text}") + return False + + def configure_audience_mapper(self, client_uuid: str) -> bool: + """Configure audience mapper directly on the client.""" + print("⚙️ Configuring audience mapper...") + + mapper_config = { + "name": "weaviate-audience", + "protocol": "openid-connect", + "protocolMapper": "oidc-audience-mapper", + "config": { + "id.token.claim": "false", + "access.token.claim": "true", + "included.client.audience": self.client_id, + }, + } + + # The endpoint is now directly on the client + response = self.make_admin_request( + "POST", + f"/{self.realm_name}/clients/{client_uuid}/protocol-mappers/models", + mapper_config, + ) + + if response.status_code == 201: + print("✅ Configured audience mapper") + return True + elif response.status_code == 409: # 409 Conflict means it already exists + print("✅ Audience mapper already exists") + return True + else: + print(f"❌ Failed to configure audience mapper: {response.status_code}") + print(f"Response: {response.text}") + return False + + def setup_all(self) -> Optional[str]: + """Run complete setup process""" + print("🚀 AUTOMATED KEYCLOAK SETUP") + print("=" * 50) + + # Get admin token + if not self.get_admin_token(): + return None + + # Create realm + if not self.create_realm(): + return None + + # Create client and get secret + client_secret = self.create_client() + if not client_secret: + return None + + # Create groups + if not self.create_groups(): + return None + + # Create users + if not self.create_users(): + return None + + # Get client UUID for mapper configuration + response = self.make_admin_request("GET", f"/{self.realm_name}/clients") + client_uuid = None + if response.status_code == 200: + clients = response.json() + for client in clients: + if client.get("clientId") == self.client_id: + client_uuid = client["id"] + break + + if client_uuid: + self.configure_group_mapper(client_uuid) + self.configure_audience_mapper(client_uuid) + + print("\n🎉 KEYCLOAK SETUP COMPLETE!") + print("=" * 50) + print(f"🌐 Keycloak URL: {self.keycloak_url}") + print(f"🏛️ Realm: {self.realm_name}") + print(f"📱 Client ID: {self.client_id}") + print(f"🔐 Client Secret: {client_secret}") + print("\n👤 Test Users:") + print(" - Username: test-admin, Password: password123") + print(" Groups: /admin-group, /my-test-group") + print(" - Username: test-viewer, Password: password123") + print(" Groups: /viewer-group") + + return client_secret + + +def main(): + """Main function to run automated setup""" + + # Test multiple ports + for port in [8081, 8082]: + keycloak_url = f"http://localhost:{port}" + print(f"Testing Keycloak at {keycloak_url}...") + + try: + response = requests.get(keycloak_url, timeout=5) + if response.status_code == 200: + print(f"✅ Found Keycloak at {keycloak_url}") + + # Run automated setup + setup = KeycloakSetup(keycloak_url) + client_secret = setup.setup_all() + + if client_secret: + print(f"\n🔧 Your client secret for the Python script:") + print(f" {client_secret}") + print(f"\n▶️ Now run your OIDC Python script!") + return + else: + print("❌ Setup failed") + return + + except Exception as e: + print(f"Cannot connect to {keycloak_url}: {e}") + continue + + print("❌ Keycloak not found on ports 8081 or 8082") + print("Make sure docker-compose is running!") + + +if __name__ == "__main__": + main() diff --git a/_includes/collection-alias-usage.mdx b/_includes/collection-alias-usage.mdx new file mode 100644 index 00000000..cd973dbc --- /dev/null +++ b/_includes/collection-alias-usage.mdx @@ -0,0 +1,8 @@ +:::info Collection alias usage + +Weaviate automatically routes alias requests to the target collection for **object-related operations**. You can use aliases wherever collection names are required for: + +- **[Managing objects](/weaviate/manage-objects)**: [Create](/weaviate/manage-objects/create), [batch import](/weaviate/manage-objects/import), [read](/weaviate/manage-objects/read), [update](/weaviate/manage-objects/update) and [delete](/weaviate/manage-objects/delete) objects through collection aliases. +- **[Querying objects](/weaviate/search)**: [Fetch](/weaviate/search/basics) objects and perform searches ([vector](/weaviate/search/similarity), [keyword](/weaviate/search/bm25), [hybrid](/weaviate/search/hybrid), [image](/weaviate/search/image), [generative/RAG](/weaviate/search/generative)) and [aggregations](/weaviate/search/aggregate) through aliases. + +::: diff --git a/_includes/configuration/rq-compression-parameters.mdx b/_includes/configuration/rq-compression-parameters.mdx index 0e33fba6..7bee774c 100644 --- a/_includes/configuration/rq-compression-parameters.mdx +++ b/_includes/configuration/rq-compression-parameters.mdx @@ -1,4 +1,4 @@ -| Parameter | Type | Default | Details | -| :------------------- | :------ | :------ | :--------------------------------------------------------------------------------------- | -| `rq`: `bits` | integer | 8 | The number of bits used to quantize each data point. Currently only 8 bits is supported. | -| `rq`: `rescoreLimit` | integer | -1 | The minimum number of candidates to fetch before rescoring. | +| Parameter | Type | Default | Details | +| :------------------- | :------ | :------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `rq`: `bits` | integer | `8` | The number of bits used to quantize each data point. Can be `8` or `1`.
Learn more about [8-bit](/weaviate/concepts/vector-quantization#8-bit-rq) and [1-bit](/weaviate/concepts/vector-quantization#1-bit-rq) RQ. | +| `rq`: `rescoreLimit` | integer | `-1` | The minimum number of candidates to fetch before rescoring. | diff --git a/_includes/release-history.md b/_includes/release-history.md index 23cc88a1..1db5f81a 100644 --- a/_includes/release-history.md +++ b/_includes/release-history.md @@ -2,6 +2,7 @@ This table lists the latest five Weaviate Database versions and corresponding cl | Weaviate Database
([GitHub][cWeaviate]) | First
release date | Python
([GitHub][cPython]) | TypeScript/
JavaScript
([GitHub][cTypeScript]) | Go
([GitHub][cGo]) | Java
([GitHub][cJava]) | | :------------------------------------------------------------------ | :---------------------- | :-------------------------------------------------------------------------------: | :------------------------------------------------------------------------: | :---------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------: | +| [1.33.x](https://github.com/weaviate/weaviate/releases/tag/v1.32.0) | 2025-09-17 | [4.17.x](https://github.com/weaviate/weaviate-python-client/releases/tag/v4.17.0) | [3.9.x](https://github.com/weaviate/typescript-client/releases/tag/v3.9.0) | [5.4.x](https://github.com/weaviate/weaviate-go-client/releases/tag/v5.4.0) | [5.5.x](https://github.com/weaviate/java-client/releases/tag/5.5.0) | | [1.32.x](https://github.com/weaviate/weaviate/releases/tag/v1.32.0) | 2025-07-14 | [4.16.x](https://github.com/weaviate/weaviate-python-client/releases/tag/v4.16.0) | [3.8.x](https://github.com/weaviate/typescript-client/releases/tag/v3.8.0) | [5.3.x](https://github.com/weaviate/weaviate-go-client/releases/tag/v5.3.0) | [5.4.x](https://github.com/weaviate/java-client/releases/tag/5.4.0) | | [1.31.x](https://github.com/weaviate/weaviate/releases/tag/v1.31.0) | 2025-05-30 | [4.15.x](https://github.com/weaviate/weaviate-python-client/releases/tag/v4.15.0) | [3.6.x](https://github.com/weaviate/typescript-client/releases/tag/v3.6.0) | [5.2.x](https://github.com/weaviate/weaviate-go-client/releases/tag/v5.2.0) | [5.3.x](https://github.com/weaviate/java-client/releases/tag/5.3.0) | | [1.30.x](https://github.com/weaviate/weaviate/releases/tag/v1.30.0) | 2025-04-03 | [4.12.x](https://github.com/weaviate/weaviate-python-client/releases/tag/v4.12.0) | [3.5.x](https://github.com/weaviate/typescript-client/releases/tag/v3.5.0) | [5.1.x](https://github.com/weaviate/weaviate-go-client/releases/tag/v5.1.0) | [5.2.x](https://github.com/weaviate/java-client/releases/tag/5.2.0) | diff --git a/_includes/starter-guides/compression-types.mdx b/_includes/starter-guides/compression-types.mdx index 4e6571fe..6f87fc96 100644 --- a/_includes/starter-guides/compression-types.mdx +++ b/_includes/starter-guides/compression-types.mdx @@ -1,9 +1,13 @@ -- [Product Quantization (PQ)](/weaviate/configuration/compression/pq-compression) PQ reduces the size of the vector embedding in two ways. PQ trains on your data to create custom segments. PQ creates segments to reduce the number of dimensions, and segments are stored as 8 bit integers instead of 32 bit floats. Compared to dimensions, there are fewer segments and each segment is much smaller than a single dimension. +- **[Rotational Quantization (RQ)](/weaviate/configuration/compression/rq-compression)** (_recommended_) + RQ reduces the size of each vector dimension from 32 bits to 8 bits (or 1 bit) without requiring training. RQ first applies a fast pseudorandom rotation to the vector, then quantizes each dimension. The rotation spreads information evenly across dimensions, enabling up to 98-99% recall without any configuration or training phase. - The PQ compression algorithm is [configurable](/weaviate/config-refs/indexing/vector-index#pq-parameters). -You control the number of segments, segment granularity, and the size of the training set. -- [Scalar Quantization (SQ)](/weaviate/configuration/compression/sq-compression) SQ reduces the size of each vector dimension from 32 bits to 8 bits. SQ trains on your data to create custom buckets for each dimension. This training helps SQ to preserve data characteristics when it maps information from the 32 bit dimensions into 8 bit buckets. +- **[Product Quantization (PQ)](/weaviate/configuration/compression/pq-compression)** + PQ reduces the size of the vector embedding in two ways. PQ trains on your data to create custom segments. PQ creates segments to reduce the number of dimensions, and segments are stored as 8 bit integers instead of 32 bit floats. Compared to dimensions, there are fewer segments and each segment is much smaller than a single dimension. -- [Rotational Quantization (RQ)](/weaviate/configuration/compression/rq-compression) RQ reduces the size of each vector dimension from 32 bits to 8 bits without requiring training. RQ first applies a fast pseudorandom rotation to the vector, then quantizes each dimension to 8 bits. The rotation spreads information evenly across dimensions, enabling up to 98-99% recall without any configuration or training phase. + The PQ compression algorithm is [configurable](/weaviate/config-refs/indexing/vector-index#pq-parameters). You control the number of segments, segment granularity, and the size of the training set. -- [Binary Quantization (BQ)](/weaviate/configuration/compression/bq-compression) BQ reduces the size of each vector dimension to a single bit. This compression algorithm works best for vectors with high dimensionality. +- **[Binary Quantization (BQ)](/weaviate/configuration/compression/bq-compression)** + BQ reduces the size of each vector dimension to a single bit. This compression algorithm works best for vectors with high dimensionality. + +- **[Scalar Quantization (SQ)](/weaviate/configuration/compression/sq-compression)** + SQ reduces the size of each vector dimension from 32 bits to 8 bits. SQ trains on your data to create custom buckets for each dimension. This training helps SQ to preserve data characteristics when it maps information from the 32 bit dimensions into 8 bit buckets. diff --git a/docs/contributor-guide/weaviate-docs/style-guide.mdx b/docs/contributor-guide/weaviate-docs/style-guide.mdx index fe099188..1b20c7c8 100644 --- a/docs/contributor-guide/weaviate-docs/style-guide.mdx +++ b/docs/contributor-guide/weaviate-docs/style-guide.mdx @@ -124,11 +124,11 @@ We indicate which version the feature was introduced in with admonitions: :::info Added in version `vX.Y.Z` ::: -- New features in **technical preview**, use a caution block to warn users: +- New features in **preview**, use a caution block to warn users: -:::caution Technical preview +:::caution Preview -`` was added in **`v1.32`** as a **technical preview**.

+`` was added in **`v1.32`** as a **preview**.

This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.** diff --git a/docs/deploy/configuration/env-vars/index.md b/docs/deploy/configuration/env-vars/index.md index 47b28e09..287fd5f4 100644 --- a/docs/deploy/configuration/env-vars/index.md +++ b/docs/deploy/configuration/env-vars/index.md @@ -32,7 +32,8 @@ import APITable from '@site/src/components/APITable'; | --- | --- | --- | --- | | `ASYNC_INDEXING` | (Experimental, added in `v1.22`.)

If set, Weaviate creates vector indexes asynchronously to the object creation process. This can be useful for importing large amounts of data. (default: `false`) | `boolean` | `false` | | `AUTOSCHEMA_ENABLED` | Whether to infer the schema where necessary with the autoschema (default: `true`) | `boolean` | `true` | -| `DEFAULT_VECTORIZER_MODULE` | Default vectorizer module - will be overridden by any class-level value defined in the schema | `string` | `text2vec-contextionary` | +| `DEFAULT_QUANTIZATION` | Default quantization technique - can be overridden by the quantization method specified in the collection definition. Available values: `rq-8`, `rq-1`, `pq`, `bq`, `sq` and `none`. Default: `rq-8`.
Added in `v1.33` | `string` | `rq-8` | +| `DEFAULT_VECTORIZER_MODULE` | Default vectorizer module - can be overridden by the vectorizer in the collection definition. | `string` | `text2vec-contextionary` | | `DISABLE_LAZY_LOAD_SHARDS` | New in v1.23. When `false`, enable lazy shard loading to improve mean time to recovery in multi-tenant deployments. | `string` | `false` | | `DISABLE_TELEMETRY` | Disable [telemetry](/deploy/configuration/telemetry.md) data collection | boolean | `false` | | `DISK_USE_READONLY_PERCENTAGE` | If disk usage is higher than the given percentage all shards on the affected node will be marked as `READONLY`, meaning all future write requests will fail. See [Disk Pressure Warnings and Limits for details](/deploy/configuration/persistence.md#disk-pressure-warnings-and-limits). | `string - number` | `90` | diff --git a/docs/weaviate/api/graphql/filters.md b/docs/weaviate/api/graphql/filters.md index 220b1138..85533951 100644 --- a/docs/weaviate/api/graphql/filters.md +++ b/docs/weaviate/api/graphql/filters.md @@ -59,6 +59,7 @@ The `where` filter is an [algebraic object](https://en.wikipedia.org/wiki/Algebr - `Operator` (which takes one of the following values) - `And` - `Or` + - `Not` - `Equal` - `NotEqual` - `GreaterThan` @@ -70,6 +71,7 @@ The `where` filter is an [algebraic object](https://en.wikipedia.org/wiki/Algebr - `IsNull` - `ContainsAny` (*Only for array and text properties) - `ContainsAll` (*Only for array and text properties) + - `ContainsNone` (*Only for array and text properties) - `Path`: Is a list of strings in [XPath](https://en.wikipedia.org/wiki/XPath#Abbreviated_syntax) style, indicating the property name of the collection. - If the property is a cross-reference, the path should be followed as a list of strings. For a `inPublication` reference property that refers to `Publication` collection, the path selector for `name` will be `["inPublication", "Publication", "name"]`. - `valueType` @@ -133,10 +135,6 @@ If the operator is `And` or `Or`, the operands are a list of `where` filters. -:::note `Not` operator -Weaviate doesn't have an operator to invert a filter (e.g. `Not Like ...` ). If you would like us to add one, please [upvote the issue](https://github.com/weaviate/weaviate/issues/3683). -::: - ### Filter behaviors #### Multi-word queries in `Equal` filters @@ -239,15 +237,15 @@ Each `Like` filter iterates over the entire inverted index for that property. Th Currently, the `Like` filter is not able to match wildcard characters (`?` and `*`) as literal characters. For example, it is currently not possible to only match the string `car*` and not `car`, `care` or `carpet`. This is a known limitation and may be addressed in future versions of Weaviate. -### `ContainsAny` / `ContainsAll` +### `ContainsAny` / `ContainsAll` / `ContainsNone` -The `ContainsAny` and `ContainsAll` operators filter objects using values of an array as criteria. +The `ContainsAny`, `ContainsAll` and `ContainsNone` operators filter objects using values of an array as criteria. Both operators expect an array of values and return objects that match based on the input values. -:::note `ContainsAny` and `ContainsAll` notes: -- The `ContainsAny` and `ContainsAll` operators treat texts as an array. The text is split into an array of tokens based on the chosen tokenization scheme, and the search is performed on that array. -- When using `ContainsAny` or `ContainsAll` with the REST api for [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects), the text array must be specified with the `valueTextArray` argument. This is different from the usage in search, where the `valueText` argument that can be used. +:::note `ContainsAny`/`ContainsAll`/`ContainsNone` notes: +- The `ContainsAny`, `ContainsAll` and `ContainsNone` operators treat texts as an array. The text is split into an array of tokens based on the chosen tokenization scheme, and the search is performed on that array. +- When using `ContainsAny`, `ContainsAll` and `ContainsNone` with the REST api for [batch deletion](../../manage-objects/delete.mdx#delete-multiple-objects), the text array must be specified with the `valueTextArray` argument. This is different from the usage in search, where the `valueText` argument that can be used. ::: @@ -265,6 +263,12 @@ A `ContainsAny` query on a path of `["languages_spoken"]` with a value of `["Chi Using the same dataset of `Person` objects as above, a `ContainsAll` query on a path of `["languages_spoken"]` with a value of `["Chinese", "French", "English"]` will return objects where all three of those languages are present in the `languages_spoken` array. +#### `ContainsNone` + +`ContainsNone` returns objects where none of the values from the input array are present. + +Using the same dataset of `Person` objects as above, a `ContainsNone` query on a path of `["languages_spoken"]` with a value of `["Chinese", "French", "English"]` will return objects where **none** of those languages are present in the `languages_spoken` array. For example, a person who speaks only Spanish would be returned, but a person who speaks English would be excluded. + ## Filter performance import RangeFilterPerformanceNote from '/_includes/range-filter-performance-note.mdx'; diff --git a/docs/weaviate/best-practices/index.md b/docs/weaviate/best-practices/index.md index dcb7f8e9..6da2912b 100644 --- a/docs/weaviate/best-practices/index.md +++ b/docs/weaviate/best-practices/index.md @@ -102,7 +102,13 @@ If you have a large number of vectors, consider using vector quantization to red ![Overview of quantization schemes](../../../_includes/images/concepts/quantization_overview_light.png#gh-light-mode-only "Overview of quantization schemes") ![Overview of quantization schemes](../../../_includes/images/concepts/quantization_overview_dark.png#gh-dark-mode-only "Overview of quantization schemes") -For HNSW indexes, we suggest enabling product quantization (PQ) as a starting point. It provides a good set of default trade-offs between memory usage and query performance, as well as tunable parameters to optimize for your specific use case. +For HNSW indexes, we suggest enabling [rotational quantization (RQ)](../configuration/compression/rq-compression.md) as a starting point. It provides significant memory usage benefits and almost no loss in query accuracy. + +:::info Compression by Default + +Starting with `v1.33`, Weaviate enables **8-bit RQ quantization by default** when creating new collections to ensure efficient resource utilization and faster performance. This behavior can be changed through the [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars/index.md#DEFAULT_QUANTIZATION) environment variable. + +::: :::tip Further resources - [How-to: Configure vector quantization](../configuration/compression/index.md) diff --git a/docs/weaviate/concepts/data-import.mdx b/docs/weaviate/concepts/data-import.mdx new file mode 100644 index 00000000..cef4710e --- /dev/null +++ b/docs/weaviate/concepts/data-import.mdx @@ -0,0 +1,70 @@ +--- +title: Data import +sidebar_position: 11 +description: "Theoretical explanation of client-side and server-side batch imports." +image: og/docs/concepts.jpg +--- + +Weaviate offers two flexible methods for importing data in bulk: **client-side batching** and **server-side batching**. This allows you to choose the best strategy based on your specific needs. + +- **Client-side batching**
+ In the client-side approach, **the Weaviate client library is responsible for grouping data into batches**. You define the batching mechanism and parameters, such as the size of each batch (e.g., 100 objects) using the appropriate [client library method](../manage-objects/import.mdx). The client then sends chunks to the Weaviate server accordingly. + + This method gives you direct control over the import process through manual tuning of parameters like the batch size and number of concurrent requests. However, the tuning must be done "blindly" on the client side, without knowledge of the server status. + +- **Server-side batching**
+ Server-side batching, or **automatic mode**, is a more robust and the recommended approach. Here, the client sends data at a rate based on **feedback from the Weaviate server**. + + Using an internal queue and a dynamic _[backpressure](https://en.wikipedia.org/wiki/Backpressure_routing)_ mechanism, the server tells the client how much data to send next based on its current workload. This simplifies your client code, eliminates the need for manual tuning, and results in a more efficient and resilient data import process. + +:::tip + +For **code examples**, check out the [How-to: Batch import](../manage-objects/import.mdx) guide. Currently, only the Python client supports batch imports. + +::: + +--- + +## Server-side batching + +:::caution Preview + +Server-side batching was added in **`v1.33`** as a **preview**.

+This means that the feature is still under development and may change in future releases, including potential breaking changes. +**We do not recommend using this feature in production environments at this time.** + +::: + +Weaviate's server-side batching, also known as **automatic batching**, aims to provide a closed-loop system for simpler, faster, and more robust data ingestion. Instead of manually tuning batch parameters on the client side, you can let the server manage the data flow rate for optimal performance. + + + +### How it works + +When an automatic batch import is initiated, the client opens a persistent connection to the server for the duration of the batch job. + +- **Client sends data**: Your client sends objects to the server in chunks, at a rate that is based on server-provided feedback. +- **Server manages queues**: The server places incoming objects into an internal. The queue is decoupled the network communication from the actual database ingestion (like vectorization and storage). +- **Dynamic backpressure**: The server continuously monitors its internal queue size. It calculates an exponential moving average (EMA) of its workload and tells the client the ideal number of objects to send in the next chunk. This feedback loop allows the system to self-regulate, maximizing throughput without overwhelming the server. +- **Asynchronous errors**: If an error occurs while processing an object (e.g., validation fails), the server sends the error message back to the client over a separate, dedicated stream without interrupting the flow of objects. + +This architecture centralizes the complex batching logic on the server, resulting in a more efficient and stable data ingestion pipeline for all connected clients. + +:::info Why use automatic (server-side) batching? + +- **Simplified client code**: No need to tweak the batch size and the number of concurrent requests manually. The server determines the optimal batch size based on its current workload. +- **Improved stability**: The system automatically applies **backpressure**. If the server is busy, it will instruct the client to send less data, preventing overloads and request timeouts, which is especially useful during long-running vectorization tasks. +- **Enhanced resilience**: It's designed to handle cluster events like node scaling more gracefully, reducing the risk of interrupted batches. + +::: + +## Further resources + +- [How-to: Batch import](../manage-objects/import.mdx) +- [How-to: Create objects](../manage-objects/create.mdx) + +## Questions and feedback + +import DocsFeedback from "/_includes/docs-feedback.mdx"; + + diff --git a/docs/weaviate/concepts/vector-quantization.md b/docs/weaviate/concepts/vector-quantization.md index 3945e487..7c8ade01 100644 --- a/docs/weaviate/concepts/vector-quantization.md +++ b/docs/weaviate/concepts/vector-quantization.md @@ -25,11 +25,11 @@ Vector quantization is a technique that reduces the memory footprint of vector e ## Product quantization -[Product quantization](https://ieeexplore.ieee.org/document/5432202) is a multi-step quantization technique that is available for use with `hnsw` indexes in Weaivate. +[Product quantization](https://ieeexplore.ieee.org/document/5432202) is a multi-step quantization technique that is available for use with `hnsw` indexes in Weaviate. PQ reduces the size of each vector embedding in two steps. First, it reduces the number of vector dimensions to a smaller number of "segments", and then each segment is quantized to a smaller number of bits from the original number of bits (typically a 32-bit float). -import PQTradeoffs from '/_includes/configuration/pq-compression/tradeoffs.mdx' ; +import PQTradeoffs from '/\_includes/configuration/pq-compression/tradeoffs.mdx' ; @@ -50,7 +50,7 @@ The PQ `segments` controls the tradeoff between memory and recall. A larger `seg Below is a list segment values for common vectorizer modules: | Module | Model | Dimensions | Segments | -|-------------|-----------------------------------------|------------|------------------------| +| ----------- | --------------------------------------- | ---------- | ---------------------- | | openai | text-embedding-ada-002 | 1536 | 512, 384, 256, 192, 96 | | cohere | multilingual-22-12 | 768 | 384, 256, 192, 96 | | huggingface | sentence-transformers/all-MiniLM-L12-v2 | 384 | 192, 128, 96 | @@ -66,9 +66,11 @@ Weaviate uses a maximum of `trainingLimit` objects (per shard) for training, eve After the PQ conversion completes, query and write to the index as normal. Distances may be slightly different due to the effects of quantization. :::info Which objects are used for training? + - (`v1.27` and later) If the collection has more objects than the training limit, Weaviate randomly selects objects from the collection to train the codebook. - - (`v1.26` and earlier) Weaviate uses the first `trainingLimit` objects in the collection to train the codebook. +- (`v1.26` and earlier) Weaviate uses the first `trainingLimit` objects in the collection to train the codebook. - If the collection has fewer objects than the training limit, Weaviate uses all objects in the collection to train the codebook. + ::: ### Encoders @@ -112,28 +114,63 @@ When SQ is enabled, Weaviate boosts recall by over-fetching compressed results. ## Rotational quantization -:::caution Technical preview +:::info Added in `v1.32` + +**8-bit Rotational quantization (RQ)** was added in **`v1.32`**. + +::: + +:::caution Preview + +**1-bit Rotational quantization (RQ)** was added in **`v1.33`** as a **preview**.
-Rotational quantization (RQ) was added in **`v1.32`** as a **technical preview**.

This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.** ::: -**Rotational quantization (RQ)** is an untrained 8-bit quantization technique that provides 4x compression while maintaining 98-99% recall on most datasets. Unlike SQ, RQ requires no training phase and can be enabled immediately at index creation. RQ works in two steps: +**Rotational quantization (RQ)** is a quantization technique that provides significant compression while maintaining high recall in internal testing. Unlike SQ, RQ requires no training phase and can be enabled immediately at index creation. RQ is available in two variants: **8-bit RQ** and **1-bit RQ**. + +### 8-bit RQ + +8-bit RQ provides 4x compression while maintaining 98-99% recall in internal testing. The method works as follows: 1. **Fast pseudorandom rotation**: The input vector is transformed using a fast rotation based on the Walsh Hadamard Transform. This rotation takes approximately 7-10 microseconds for a 1536-dimensional vector. The output dimension is rounded up to the nearest multiple of 64. 2. **Scalar quantization**: Each entry of the rotated vector is quantized to an 8-bit integer. The minimum and maximum values of each individual rotated vector define the quantization interval. +### 1-bit RQ + +1-bit RQ is an asymmetric quantization method that provides close to 32x compression as dimensionality increases. **1-bit RQ serves as a more robust and accurate alternative to BQ** with only a slight performance trade-off (approximately 10% decrease in throughput in internal testing compared to BQ). While more performant than PQ in terms of encoding time and distance calculations, 1-bit RQ typically offers slightly lower recall than well-tuned PQ. + +The method works as follows: + +1. **Fast pseudorandom rotation**: The same rotation process as 8-bit RQ is applied to the input vector. For 1-bit RQ, the output dimension is always padded to at least 256 bits to improve performance on low-dimensional data. + +2. **Asymmetric quantization**: + - **Data vectors**: Quantized using 1 bit per dimension by storing only the sign of each entry + - **Query vectors**: Scalar quantized using 5 bits per dimension during search + + + +This asymmetric approach improves recall compared to symmetric 1-bit schemes (such as BQ) by using more precision for query vectors during distance calculation. On datasets well-suited for BQ (like OpenAI embeddings), 1-bit RQ essentially matches BQ recall. It also works well on datasets where BQ performs poorly (such as [SIFT](https://arxiv.org/abs/2504.09081)). + +### RQ characteristics + The rotation step provides multiple benefits. It tends to reduce the quantization interval and decrease quantization error by distributing values more uniformly. It also distributes the distance information more evenly across all dimensions, providing a better starting point for distance estimation. -It's worth noting that RQ rounds up dimensions to multiples of 64 which means that low-dimensional data (< 64 or 128 dimensions) might result in less than optimal compression. +Both RQ variants round up the number of dimensions to multiples of 64, which means that low-dimensional data (< 64 or 128 dimensions) might result in less than optimal compression. Additionally, several factors affect the actual compression rates: -While inspired by extended RaBitQ, this implementation differs significantly for performance reasons. It Uses fast pseudorandom rotations instead of truly random rotations and it employs scalar quantization instead of RaBitQ's encoding algorithm, which becomes prohibitively slow with more bits per entry. +- **Auxiliary data storage**: 16 bytes for 8-bit RQ and 8 bytes for 1-bit RQ are stored with the compressed codes +- **Dimension rounding**: Dimensionality is rounded up to the nearest multiple of 64 and 1-bit RQ is also padded to at least 256 bits +Due to these factors, the 4x and 32x compression rates are only approached as dimensionality increases. These effects are more pronounced for low-dimensional vectors. + +While inspired by extended [RaBitQ](https://arxiv.org/abs/2405.12497), this implementation differs significantly for performance reasons. It uses fast pseudorandom rotations instead of truly random rotations. :::tip -Learn more about how to [configure rotational quantization](../configuration/compression/rq-compression.md) in Weaviate. + +Learn more about how to [configure rotational quantization](../configuration/compression/rq-compression.md) in Weaviate or dive deer into the [implementation details and theoretical background](https://weaviate.io/blog/8-bit-rotational-quantization). + ::: ## Over-fetching / re-scoring @@ -173,6 +210,7 @@ In some cases, rescoring also includes over-fetching, whereby additional candida ## Further resources :::info Related pages + - [Concepts: Indexing](./indexing/index.md) - [Concepts: Vector Indexing](./indexing/vector-index.md) - [Configuration: Vector index](../config-refs/indexing/vector-index.mdx) @@ -182,10 +220,11 @@ In some cases, rescoring also includes over-fetching, whereby additional candida - [How to configure: Scalar quantization (compression)](../configuration/compression/sq-compression.md) - [How to configure: Rotational quantization (compression)](../configuration/compression/rq-compression.md) - [Weaviate Academy: 250 Vector Compression](../../academy/py/compression/index.md) + ::: ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from '/\_includes/docs-feedback.mdx'; diff --git a/docs/weaviate/config-refs/collections.mdx b/docs/weaviate/config-refs/collections.mdx index c681e793..203a9636 100644 --- a/docs/weaviate/config-refs/collections.mdx +++ b/docs/weaviate/config-refs/collections.mdx @@ -850,7 +850,7 @@ For more code example and configuration guides visit the [How-to: Manage collect ## Mutability -Some, but not all, parameters are mutable after you create your collection. To modify immutable parameters, export your data, create a new collection, and import your data into it. Use [collection aliases](../starter-guides/managing-collections/index.mdx#migration-workflow-with-collection-aliases) to perform such a migration with zero downtime. +Some, but not all, parameters are mutable after you create your collection. To modify immutable parameters, export your data, create a new collection, and import your data into it.
Mutable parameters @@ -923,22 +923,25 @@ For more details, see [Starter Guides: Scaling limits with collections](../start ## Collection aliases -:::caution Technical preview - -Collection aliases were added in **`v1.32`** as a **technical preview**.

-This means that the feature is still under development and may change in future releases, including potential breaking changes. -**We do not recommend using this feature in production environments at this time.** - +:::info Added in `v1.32` ::: Collection aliases are alternative names for Weaviate collections that allow you to reference a collection by an alternative name. -Weaviate automatically routes alias requests to the target collection. This allows you to use aliases wherever collection names are required. This includes [collection management](../manage-collections/index.mdx), [queries](../search/index.mdx), and all other operations requiring a specific collection name with the **exception** of deleting collections. To delete a collection you need to use its name. Deleting a collection does not automatically delete aliases pointing to it. - Alias names must be unique (can't match existing collections or other aliases) and multiple aliases can point to the same collection. You can set up collection aliases [programmatically through client libraries](../manage-collections/collection-aliases.mdx) or by using the REST endpoints. In order to manage collection aliases, you need to posses the right [`Collection aliases`](../configuration/rbac/index.mdx#available-permissions) permissions. To manage the underlying collection the alias references, you also need the [`Collections`](../configuration/rbac/index.mdx#available-permissions) permissions for that specific collection. +**Collection aliases cannot be used to update collection definitions**, including: +- Updating and adding properties +- Updating vector and inverted indexes +- Configuring sharding and multi-tenancy +- Modifying vectorizer, generative and reranker configurations + +import CollectionAliasUsage from "/_includes/collection-alias-usage.mdx"; + + + ## Further resources - [Starter guides: Collection definition](/weaviate/starter-guides/managing-collections) diff --git a/docs/weaviate/config-refs/indexing/vector-index.mdx b/docs/weaviate/config-refs/indexing/vector-index.mdx index c42e82d2..f7c94530 100644 --- a/docs/weaviate/config-refs/indexing/vector-index.mdx +++ b/docs/weaviate/config-refs/indexing/vector-index.mdx @@ -10,6 +10,30 @@ There are three supported vector index types: - **[Flat index](#flat-index)** - **[Dynamic index](#dynamic-index)** +## Index configuration parameters + +:::caution Experimental feature +Available starting in `v1.25`. Dynamic indexing is an experimental feature. Use with caution. +::: + +Use these parameters to configure the index type and their properties. They can be set in the [collection configuration](../../manage-collections/vector-config.mdx#set-vector-index-type). + +| Parameter | Type | Default | Details | +| :------------------ | :----- | :------ | :------------------------------------------------------------------- | +| `vectorIndexType` | string | `hnsw` | Optional. The index type - can be `hnsw`, `flat` or `dynamic`. | +| `vectorIndexConfig` | object | - | Optional. Set parameters that are specific to the vector index type. | + +
+ How to select the index type + +Generally, the `hnsw` index type is recommended for most use cases. The `flat` index type is recommended for use cases where the data the number of objects per index is low, such as in multi-tenancy cases. You can also opt for the `dynamic` index which will initially configure a `flat` index and once the object count exceeds a specified threshold it will automatically convert to an `hnsw` index. + +See [this section](../../concepts/indexing/vector-index.md#which-vector-index-is-right-for-me) for more information about the different index types and how to choose between them. + +
+ +If faster import speeds are desired, [asynchronous indexing](#asynchronous-indexing) allows de-coupling of indexing from object creation. + ## HNSW index HNSW indexes are scalable and super fast at query time, but HNSW algorithms are costly when you add data during the index building process. @@ -29,20 +53,25 @@ Some HNSW parameters are mutable, but others cannot be modified after you create | `dynamicEfMax` | integer | Upper bound for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Protects against creating a search list that is too long.

If `dynamicEfMax` is higher than the limit, `dynamicEfMax` does not have any effect. In this case, `ef` is the limit.

This setting is only used when `ef` is -1. | 500 | Yes | | `dynamicEfFactor` | integer | Multiplier for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Sets the potential length of the search list.

This setting is only used when `ef` is -1. | 8 | Yes | | `filterStrategy` | string | Added in `v1.27.0`. The filter strategy to use for filtering the search results. The filter strategy can be set to `sweeping` or `acorn`.

- `sweeping`: The default filter strategy.
- `acorn`: Uses Weaviate's ACORN implementation. [Read more](../../concepts/filtering.md#filter-strategy) | `sweeping` | Yes | -| `flatSearchCutoff` | integer | Optional. Threshold for the [flat-search cutoff](/weaviate/concepts/filtering.md#flat-search-cutoff). To force a vector index search, set `"flatSearchCutoff": 0`. | 40000 | Yes | +| `flatSearchCutoff` | integer | Optional. Threshold for the [flat-search cutoff](/weaviate/concepts/filtering.md#flat-search-cutoff). To force a vector index search, set `"flatSearchCutoff": 0`. | 40000 | Yes | | `skip` | boolean | When true, do not index the collection.

Weaviate decouples vector creation and vector storage. If you skip vector indexing, but a vectorizer is configured (or a vector is provided manually), Weaviate logs a warning each import.

To skip indexing and vector generation, set `"vectorizer": "none"` when you set `"skip": true`.

See [When to skip indexing](../../concepts/indexing/vector-index.md#when-to-skip-indexing). | `false` | No | | `vectorCacheMaxObjects` | integer | Maximum number of objects in the memory cache. By default, this limit is set to one trillion (`1e12`) objects when a new collection is created. For sizing recommendations, see [Vector cache considerations](../../concepts/indexing/vector-index.md#vector-cache-considerations). | `1e12` | Yes | -| `pq` | object | Enable and configure [product quantization (PQ)](/weaviate/concepts/indexing/vector-index.md) compression.

PQ assumes some data has already been loaded. You should have 10,000 to 100,000 vectors per shard loaded before you enable PQ.

For PQ configuration details, see [PQ configuration parameters](#pq-parameters). | -- | Yes | +| `rq` | object | Enable and configure [rotational quantization (RQ)](/weaviate/concepts/indexing/vector-index.md) compression.

For RQ configuration details, see [RQ configuration parameters](#pq-parameters). | -- | Yes | +| `pq` | object | Enable and configure [product quantization (PQ)](/weaviate/concepts/indexing/vector-index.md) compression.

PQ assumes some data has already been loaded. You should have 10,000 to 100,000 vectors per shard loaded before you enable PQ.

For PQ configuration details, see [PQ configuration parameters](#pq-parameters). | -- | Yes | +| `bq` | object | Enable and configure [binery quantization (BQ)](/weaviate/concepts/indexing/vector-index.md) compression.

For BQ configuration details, see [BQ configuration parameters](#bq-parameters). | -- | Yes | +| `sq` | object | Enable and configure [product quantization (SQ)](/weaviate/concepts/indexing/vector-index.md) compression.

For SQ configuration details, see [SQ configuration parameters](#sq-parameters). | -- | Yes | ### Database parameters for HNSW Note that some database-level parameters are available to configure HNSW indexing behavior. -- `PERSISTENCE_HNSW_MAX_LOG_SIZE` is a database-level parameter that sets the maximum size of the HNSW write-ahead-log. The default value is `500MiB`. +- [`PERSISTENCE_HNSW_MAX_LOG_SIZE`](/deploy/configuration/env-vars/index.md#PERSISTENCE_HNSW_MAX_LOG_SIZE) is a database-level parameter that sets the maximum size of the HNSW write-ahead-log. The default value is `500MiB`. + + Increase this value to improve efficiency of the compaction process, but be aware that this will increase the memory usage of the database. Conversely, decreasing this value will reduce memory usage but may slow down the compaction process. -Increase this value to improve efficiency of the compaction process, but be aware that this will increase the memory usage of the database. Conversely, decreasing this value will reduce memory usage but may slow down the compaction process. + Preferably, the `PERSISTENCE_HNSW_MAX_LOG_SIZE` should set to a value close to the size of the HNSW graph. -Preferably, the `PERSISTENCE_HNSW_MAX_LOG_SIZE` should set to a value close to the size of the HNSW graph. +- [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars/index.md#DEFAULT_QUANTIZATION) is a database-level parameter that defines which quantization technique will be used by default when creating new collections. ### Tombstone cleanup parameters @@ -110,17 +139,7 @@ Flat indexes are recommended for use cases where the number of objects per index | Parameter | Type | Default | Changeable | Details | | :---------------------- | :------ | :------ | :--------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `vectorCacheMaxObjects` | integer | `1e12` | Yes | Maximum number of objects in the memory cache. By default, this limit is set to one trillion (`1e12`) objects when a new collection is created. For sizing recommendations, see [Vector cache considerations](../../concepts/indexing/vector-index.md#vector-cache-considerations). | -| `bq` | object | -- | No | Enable and configure [binary quantization (BQ)](../../concepts/vector-quantization.md#binary-quantization) compression.

For BQ configuration details, see [BQ configuration parameters](#bq-configuration-parameters). | - -### BQ configuration parameters - -Configure `bq` with these parameters. - -| Parameter | Type | Default | Details | -| :------------- | :------ | :------ | :------------------------------------------------------------------------- | -| `enabled` | boolean | `false` | Enable BQ. Weaviate uses binary quantization (BQ) compression when `true`. | -| `rescoreLimit` | integer | -1 | The minimum number of candidates to fetch before rescoring. | -| `cache` | boolean | `false` | Whether to use the vector cache. | +| `bq` | object | -- | No | Enable and configure [binary quantization (BQ)](../../concepts/vector-quantization.md#binary-quantization) compression.

For BQ configuration details, see [BQ configuration parameters](#bq-parameters). | ## Dynamic index @@ -144,32 +163,10 @@ The goal of `dynamic` indexing is to shorten latencies during query time at the | :---------- | :------ | :----------- | :------------------------------------------------------------------------------------ | | `distance` | string | `cosine` | Distance metric. The metric that measures the distance between two arbitrary vectors. | | `hnsw` | object | default HNSW | [HNSW index configuration](#hnsw-index-parameters) to be used. | -| `flat` | object | default Flat | [Flat index configuration](#flat-index) to be used. | +| `flat` | object | default Flat | [Flat index configuration](#flat-index) to be used. | | `threshold` | integer | 10000 | Threshold object count at which `flat` to `hnsw` conversion happens | -## Index configuration parameters - -:::caution Experimental feature -Available starting in `v1.25`. Dynamic indexing is an experimental feature. Use with caution. -::: - -Use these parameters to configure the index type and their properties. They can be set in the [collection configuration](../../manage-collections/vector-config.mdx#set-vector-index-type). - -| Parameter | Type | Default | Details | -| :------------------ | :----- | :------ | :------------------------------------------------------------------- | -| `vectorIndexType` | string | `hnsw` | Optional. The index type - can be `hnsw`, `flat` or `dynamic`. | -| `vectorIndexConfig` | object | - | Optional. Set parameters that are specific to the vector index type. | - -
- How to select the index type - -Generally, the `hnsw` index type is recommended for most use cases. The `flat` index type is recommended for use cases where the data the number of objects per index is low, such as in multi-tenancy cases. You can also opt for the `dynamic` index which will initially configure a `flat` index and once the object count exceeds a specified threshold it will automatically convert to an `hnsw` index. - -See [this section](../../concepts/indexing/vector-index.md#which-vector-index-is-right-for-me) for more information about the different index types and how to choose between them. - -
- -If faster import speeds are desired, [asynchronous indexing](#asynchronous-indexing) allows de-coupling of indexing from object creation. +## Quantization parameters ### RQ parameters @@ -203,6 +200,15 @@ import BQParameters from "/_includes/configuration/bq-compression-parameters.mdx +## Default quantization for new collections {#default-quantization} + +:::info Added in `v1.33` +::: + +Starting with Weaviate v1.33, **[8-bit Rotational Quantization (RQ)](../../configuration/compression/rq-compression.md) is enabled by default for all new collections**. This automatic optimization provides up to 4x memory compression and faster query performance while maintaining high recall accuracy. Existing collections (for example restored from backups) are not affected and retain their original configuration. + +Set the [`DEFAULT_QUANTIZATION` environment variable](/docs/deploy/configuration/env-vars/index.md#DEFAULT_QUANTIZATION) before starting Weaviate to change the default quantization technique or to disable it. + ## Configure semantic indexing Weaviate can generate vector embeddings for objects using [model provider integrations](/weaviate/model-providers/). diff --git a/docs/weaviate/configuration/compression/bq-compression.md b/docs/weaviate/configuration/compression/bq-compression.md index a6f3da83..ab2fe888 100644 --- a/docs/weaviate/configuration/compression/bq-compression.md +++ b/docs/weaviate/configuration/compression/bq-compression.md @@ -1,6 +1,5 @@ --- title: Binary Quantization (BQ) -sidebar_position: 6 image: og/docs/configuration.jpg # tags: ['configuration', 'compression', 'bq'] --- diff --git a/docs/weaviate/configuration/compression/index.md b/docs/weaviate/configuration/compression/index.md index 8b3d4471..67d232b7 100644 --- a/docs/weaviate/configuration/compression/index.md +++ b/docs/weaviate/configuration/compression/index.md @@ -5,14 +5,27 @@ image: og/docs/configuration.jpg # tags: ['configuration', 'compression', 'pq'] --- -Uncompressed vectors can be large. Compressed vectors lose some information, but they use fewer resources and can be very cost effective. +Uncompressed vectors can be large. Compressed vectors lose some information, but they use fewer resources and can be very cost effective. + +## Vector quantization To balance resource costs and system performance, consider one of these options: -- [Binary Quantization (BQ)](/weaviate/configuration/compression/bq-compression) -- [Product Quantization (PQ)](/weaviate/configuration/compression/pq-compression) -- [Rotational Quantization (RQ)](/weaviate/configuration/compression/rq-compression) -- [Scalar Quantization (SQ)](/weaviate/configuration/compression/sq-compression) +- **[Rotational Quantization (RQ)](rq-compression.md)** (_recommended_) +- **[Product Quantization (PQ)](pq-compression.md)** +- **[Binary Quantization (BQ)](bq-compression.md)** +- **[Scalar Quantization (SQ)](sq-compression.md)** + +You can also [disable quantization](uncompressed.md) for a collection. + +:::info Compression by Default + +Starting with `v1.33`, Weaviate enables **8-bit RQ quantization by default** when creating new collections to ensure efficient resource utilization and faster performance. This behavior can be changed through the [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars/index.md#DEFAULT_QUANTIZATION) environment variable. + +::: + +## Multi-vector encoding Aside from quantization, Weaviate also offers encodings for multi-vector embeddings: -- [MUVERA encoding](./multi-vectors.md) + +- **[MUVERA encoding](./multi-vectors.md)** diff --git a/docs/weaviate/configuration/compression/multi-vectors.md b/docs/weaviate/configuration/compression/multi-vectors.md index 5bc90fe1..297b10df 100644 --- a/docs/weaviate/configuration/compression/multi-vectors.md +++ b/docs/weaviate/configuration/compression/multi-vectors.md @@ -1,6 +1,5 @@ --- title: Multi-vector encodings -sidebar_position: 30 image: og/docs/configuration.jpg # tags: ['configuration', 'compression'] --- diff --git a/docs/weaviate/configuration/compression/pq-compression.md b/docs/weaviate/configuration/compression/pq-compression.md index 88e6dd79..0455258a 100644 --- a/docs/weaviate/configuration/compression/pq-compression.md +++ b/docs/weaviate/configuration/compression/pq-compression.md @@ -1,6 +1,5 @@ --- title: Product Quantization (PQ) -sidebar_position: 5 image: og/docs/configuration.jpg # tags: ['configuration', 'compression', 'pq'] --- diff --git a/docs/weaviate/configuration/compression/rq-compression.md b/docs/weaviate/configuration/compression/rq-compression.md index 1e35e035..d22a289b 100644 --- a/docs/weaviate/configuration/compression/rq-compression.md +++ b/docs/weaviate/configuration/compression/rq-compression.md @@ -1,6 +1,5 @@ --- title: Rotational Quantization (RQ) -sidebar_position: 25 image: og/docs/configuration.jpg # tags: ['configuration', 'compression', 'rq'] --- @@ -13,21 +12,35 @@ import GoCode from '!!raw-loader!/\_includes/code/howto/go/docs/configure/compre import TSCode from '!!raw-loader!/\_includes/code/howto/configure-rq/rq-compression-v3.ts'; import JavaCode from '!!raw-loader!/\_includes/code/howto/java/src/test/java/io/weaviate/docs/rq-compression.java'; -:::caution Technical preview +:::info Added in `v1.32` + +**8-bit Rotational quantization (RQ)** was added in **`v1.32`**. + +::: + +:::caution Preview + +**1-bit Rotational quantization (RQ)** was added in **`v1.33`** as a **preview**.
-Rotational quantization (RQ) was added in **`v1.32`** as a **technical preview**.

This means that the feature is still under development and may change in future releases, including potential breaking changes. **We do not recommend using this feature in production environments at this time.** ::: -[**Rotational quantization (RQ)**](../../concepts/vector-quantization.md#rotational-quantization) is a fast untrained vector compression technique that offers 4x compression while retaining almost perfect recall (98-99% on most datasets). +[**Rotational quantization (RQ)**](../../concepts/vector-quantization.md#rotational-quantization) is a fast vector compression technique that offers significant performance benefits. Two RQ variants are available in Weaviate: + +- **8-bit RQ**: Up to 4x compression while retaining almost perfect recall (98-99% on most datasets). **Recommended** for most use cases. +- **1-bit RQ**: Close to 32x compression as dimensionality increases with moderate recall across various datasets. :::note HNSW only RQ is currently not supported for the flat index type. ::: -## Enable compression for new collection +## 8-bit RQ + +[8-bit RQ](../../concepts/vector-quantization.md#8-bit-rq) provides up-to 4x compression while maintaining 98-99% recall in internal testing. It is generally recommended for most use cases as the default quantization techniques. + +### Enable compression for new collection RQ can be enabled at collection creation time through the collection definition: @@ -66,7 +79,7 @@ RQ can be enabled at collection creation time through the collection definition: -## Enable compression for existing collection +### Enable compression for existing collection RQ can also be enabled for an existing collection by updating the collection definition: @@ -97,6 +110,80 @@ RQ can also be enabled for an existing collection by updating the collection def +## 1-bit RQ + +[1-bit RQ](../../concepts/vector-quantization.md#1-bit-rq) is an quantization technique that provides close to 32x compression as dimensionality increases. 1-bit RQ serves as a more robust and accurate alternative to [BQ](./bq-compression.md) with only a slight performance trade-off. While more performant than PQ in terms of encoding time and distance calculations, 1-bit RQ typically offers slightly lower recall than well-tuned [PQ](./pq-compression.md). + +### Enable compression for new collection + +RQ can be enabled at collection creation time through the collection definition: + + + + + + + + + + + + + + + + +### Enable compression for existing collection + +RQ can also be enabled for an existing collection by updating the collection definition: + + + + + + + + + + + + + ## RQ parameters To tune RQ, use these quantization and vector index parameters: diff --git a/docs/weaviate/configuration/compression/sq-compression.md b/docs/weaviate/configuration/compression/sq-compression.md index c0bc9d36..2ff3cc20 100644 --- a/docs/weaviate/configuration/compression/sq-compression.md +++ b/docs/weaviate/configuration/compression/sq-compression.md @@ -1,6 +1,5 @@ --- title: Scalar Quantization (SQ) -sidebar_position: 27 image: og/docs/configuration.jpg # tags: ['configuration', 'compression', 'sq'] --- diff --git a/docs/weaviate/configuration/compression/uncompressed.md b/docs/weaviate/configuration/compression/uncompressed.md new file mode 100644 index 00000000..bee5af54 --- /dev/null +++ b/docs/weaviate/configuration/compression/uncompressed.md @@ -0,0 +1,62 @@ +--- +title: Uncompressed vector embeddings +sidebar_label: No quantization +image: og/docs/configuration.jpg +# tags: ['configuration', 'compression', 'rq'] +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; +import PyCode from '!!raw-loader!/\_includes/code/howto/configure-rq/rq-compression-v4.py'; +import GoCode from '!!raw-loader!/\_includes/code/howto/go/docs/configure/compression.rq_test.go'; +import TSCode from '!!raw-loader!/\_includes/code/howto/configure-rq/rq-compression-v3.ts'; +import JavaCode from '!!raw-loader!/\_includes/code/howto/java/src/test/java/io/weaviate/docs/rq-compression.java'; + +You can opt-out of using vector quantization to compress your vector data. + +## Disable compression for new collection + +When creating the collection, you can choose not to use quantization through the collection definition: + + + + + + + +## Additional considerations + +### Multiple vector embeddings (named vectors) + +import NamedVectorCompress from '/\_includes/named-vector-compress.mdx'; + + + +### Multi-vector embeddings (ColBERT, ColPali, etc.) + +import MultiVectorCompress from '/\_includes/multi-vector-compress.mdx'; + + + +:::note Multi-vector performance +RQ supports multi-vector embeddings. Each token vector is rounded up to a multiple of 64 dimensions, which may result in less than 4x compression for very short vectors. This is a technical limitation that may be addressed in future versions. +::: + +## Further resources + +- [Starter guides: Compression](/docs/weaviate/starter-guides/managing-resources/compression.mdx) +- [Reference: Vector index](/weaviate/config-refs/indexing/vector-index.mdx) +- [Concepts: Vector quantization](/docs/weaviate/concepts/vector-quantization.md) +- [Concepts: Vector index](/weaviate/concepts/indexing/vector-index.md) + +## Questions and feedback + +import DocsFeedback from '/\_includes/docs-feedback.mdx'; + + diff --git a/docs/weaviate/configuration/rbac/index.mdx b/docs/weaviate/configuration/rbac/index.mdx index 45dfc78d..2ddbf9d9 100644 --- a/docs/weaviate/configuration/rbac/index.mdx +++ b/docs/weaviate/configuration/rbac/index.mdx @@ -400,6 +400,31 @@ Permissions can be defined with the following resources, access levels and optio + + + + Groups + + + +
Read groups
+
Assign and revoke group membership
+ + +

Group name filter:

+
    +
  • + string or regex: specifies which groups can be managed +
  • +
+

Group type filter:

+
    +
  • + oidc (only OIDC user groups are supported at the moment) +
  • +
+ + diff --git a/docs/weaviate/configuration/rbac/manage-groups.mdx b/docs/weaviate/configuration/rbac/manage-groups.mdx new file mode 100644 index 00000000..a1300638 --- /dev/null +++ b/docs/weaviate/configuration/rbac/manage-groups.mdx @@ -0,0 +1,252 @@ +--- +title: Manage groups +sidebar_label: Manage groups +image: og/docs/configuration.jpg +# tags: ['rbac', 'groups', 'configuration', 'authorization', 'oidc'] +--- + +import Link from "@docusaurus/Link"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import OidcGroupPyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.oidc.groups.py"; + + +:::info Added in `v1.33` +::: + +When using [OIDC](/deploy/configuration/oidc.md) for authentication, you can leverage user groups defined in your identity provider (like Keycloak, Okta, or Auth0) to manage permissions in Weaviate. The user's group memberships are passed to Weaviate in the OIDC token. + +You can then assign Weaviate roles directly to these **OIDC groups**. Any user who is a member of that group will automatically inherit the permissions of the assigned roles. This is a powerful way to manage access for large teams without assigning roles to each user individually. + +On this page, you will find examples of how to programmatically **manage OIDC groups** and their associated roles. + +## Group management {#group-management} + +### Assign roles to an OIDC group + +You can assign one or more Weaviate roles to an OIDC group. Any user belonging to this group will inherit the roles' permissions. + +This example assigns the `testRole` and `viewer` roles to the `/admin-group`. + + + + + + + +```typescript +// TypeScript/JavaScript support coming soon +``` + + + + +```go +// Go support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + +### Revoke roles from an OIDC group + +You can revoke one or more roles from a specific OIDC group. + +This example removes the `testRole` and `viewer` roles from the `/admin-group`. + + + + + + + +```typescript +// TypeScript/JavaScript support coming soon +``` + + + + +```go +// Go support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + +### List roles assigned to an OIDC group + +Retrieve a list of all roles that have been assigned to a specific OIDC group. + + + + + + + +```typescript +// TypeScript/JavaScript support coming soon +``` + + + + +```go +// Go support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + +
+Example results + +```text +Roles assigned to '/admin-group': ['testRole', 'viewer'] +``` + +
+ +### List all known OIDC groups + +This example shows how to get a list of all OIDC groups that Weaviate is aware of. Weaviate learns about a group when a role is first assigned to it. + + + + + + + +```typescript +// TypeScript/JavaScript support coming soon +``` + + + + +```go +// Go support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + +
+Example results + +```text +Known OIDC groups (3): ['/viewer-group', '/admin-group', '/my-test-group'] +``` + +
+ +### List groups assigned to a role + +Retrieve a list of all groups that have been assigned a specific role. + +This example shows which groups have the `testRole` assigned to them. + + + + + + + +```typescript +// TypeScript/JavaScript support coming soon +``` + + + + +```go +// Go support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + +
+Example results + +```text +Groups assigned to role 'testRole': + - Group ID: /admin-group, Type: oidc +``` + +
+ +## Further resources + +- [RBAC: Overview](https://www.google.com/search?q=./index.mdx) +- [RBAC: Configuration](https://www.google.com/search?q=/deploy/configuration/configuring-rbac.md) +- [RBAC: Manage roles](https://www.google.com/search?q=./manage-roles.mdx) +- [RBAC: Manage users](https://www.google.com/search?q=./manage-users.mdx) + +## Questions and feedback + +import DocsFeedback from "/_includes/docs-feedback.mdx"; + + +```` diff --git a/docs/weaviate/configuration/rbac/manage-roles.mdx b/docs/weaviate/configuration/rbac/manage-roles.mdx index 7f1d3db1..50f5708f 100644 --- a/docs/weaviate/configuration/rbac/manage-roles.mdx +++ b/docs/weaviate/configuration/rbac/manage-roles.mdx @@ -6,24 +6,26 @@ image: og/docs/configuration.jpg # tags: ['rbac', 'roles', 'configuration', 'authorization'] --- -import Link from '@docusaurus/Link'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; -import PyCode from '!!raw-loader!/_includes/code/python/howto.configure.rbac.permissions.py'; -import TSCode from '!!raw-loader!/_includes/code/typescript/howto.configure.rbac.permissions.ts'; -import RolePyCode from '!!raw-loader!/_includes/code/python/howto.configure.rbac.roles.py'; -import RoleTSCode from '!!raw-loader!/_includes/code/typescript/howto.configure.rbac.roles.ts'; +import Link from "@docusaurus/Link"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.permissions.py"; +import TSCode from "!!raw-loader!/_includes/code/typescript/howto.configure.rbac.permissions.ts"; +import RolePyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.roles.py"; +import RoleTSCode from "!!raw-loader!/_includes/code/typescript/howto.configure.rbac.roles.ts"; +import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-roles.java"; +import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/configure/rbac.roles_test.go"; :::info Added in `v1.29` Role-based access control (RBAC) is generally available in Weaviate from version `v1.29`. ::: -In Weaviate, Role-based access control (RBAC) allows you to define roles and assign permissions to those roles. Users can then be assigned to roles and inherit the permissions associated with those roles. +In Weaviate, Role-based access control (RBAC) allows you to define roles and assign permissions to those roles. Users can then be assigned to roles and inherit the permissions associated with those roles. On this page, you will find examples of how to **manage roles and permissions** with Weaviate client libraries. -import ConfigureRbac from '/_includes/configuration/configure-rbac.mdx'; +import ConfigureRbac from "/_includes/configuration/configure-rbac.mdx"; @@ -52,18 +54,20 @@ Role management requires appropriate `role` resource permissions that can be obt /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -73,21 +77,27 @@ Role management requires appropriate `role` resource permissions that can be obt Permissions for these resource types can be assigned to roles: -1. [**Role Management**](#role-management-permissions) +1. [**Role Management**](#role-management-permissions) + +2. [**User Management**](#user-management-permissions) -1. [**User Management**](#user-management-permissions) +3. [**Collections**](#collections-permissions) (collection definitions only, data object permissions are separate) -1. [**Collections**](#collections-permissions) (collection definitions only, data object permissions are separate) +4. [**Tenants**](#tenants-permissions) -1. [**Tenants**](#tenants-permissions) +5. [**Data Objects**](#data-permissions) -1. [**Data Objects**](#data-permissions) +6. [**Backup**](#backups-permissions) -1. [**Backup**](#backups-permissions) +7. [**Cluster Data Access**](#clusters-permissions) -1. [**Cluster Data Access**](#clusters-permissions) +8. [**Node Data Access**](#nodes-permissions) -1. [**Node Data Access**](#nodes-permissions) +9. [**Collection alias**](#aliases-permissions) + +10. [**Replications**](#replications-permissions) + +11. [**Groups**](#groups-permissions) #### Create a role with `Role Management` permissions {#role-management-permissions} @@ -106,25 +116,27 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddManageRolesPermission" + endMarker="// END AddManageRolesPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -150,21 +162,23 @@ This example creates a role called `testRole` with permissions to: // TS/JS support coming soon ``` - - - -```go -// Go support coming soon -``` - - - - -```java -// Java support coming soon -``` - - + + + + + + + #### Create a role with `Collections` permissions {#collections-permissions} @@ -184,25 +198,27 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddCollectionsPermission" + endMarker="// END AddCollectionsPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -225,25 +241,27 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddTenantPermission" + endMarker="// END AddTenantPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -265,25 +283,27 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddDataObjectPermission" + endMarker="// END AddDataObjectPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -304,25 +324,27 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddBackupPermission" + endMarker="// END AddBackupPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -343,25 +365,27 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddClusterPermission" + endMarker="// END AddClusterPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -382,29 +406,31 @@ This example creates a role called `testRole` with permissions to: + text={RoleTSCode} + startMarker="// START AddNodesPermission" + endMarker="// END AddNodesPermission" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + -#### Create a role with `Collection Aliases` permissions {#aliases-permissions} +#### Create a role with `Collection Alias` permissions {#aliases-permissions} This example creates a role called `testRole` with permissions to: @@ -425,21 +451,23 @@ This example creates a role called `testRole` with permissions to: // TS/JS support coming soon ``` - - - -```go -// Go support coming soon -``` - - - - -```java -// Java support coming soon -``` - - + + + + + + + #### Create a role with `Replications` permissions {#replications-permissions} @@ -463,21 +491,63 @@ This example creates a role called `testRole` with permissions to: // TS/JS support coming soon ``` - - + + + + + + + + -```go -// Go support coming soon -``` +#### Create a role with `Groups` permissions {#groups-permissions} - - +This example creates a role called `testRole` with permissions to: + +- Read information about and assign/revoke group membership for OIDC groups starting with `TargetGroup`. -```java -// Java support coming soon + + + + + + +```typescript +// TS/JS support coming soon ``` - + + + + + + + ### Grant additional permissions @@ -499,25 +569,27 @@ This example grants additional permissions to the role `testRole` to: + text={RoleTSCode} + startMarker="// START AddRoles" + endMarker="// END AddRoles" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -548,18 +620,20 @@ This example removes the following permissions from the role `testRole`: /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -578,25 +652,27 @@ Check if the role `testRole` exists: + text={RoleTSCode} + startMarker="// START CheckRoleExists" + endMarker="// END CheckRoleExists" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -615,25 +691,27 @@ View the permissions assigned to a role. + text={RoleTSCode} + startMarker="// START InspectRole" + endMarker="// END InspectRole" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -652,25 +730,27 @@ View all roles in the system and their permissions. + text={RoleTSCode} + startMarker="// START ListAllRoles" + endMarker="// END ListAllRoles" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -696,18 +776,20 @@ List all users who have the role `testRole`. /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -733,18 +815,20 @@ Deleting a role will remove it from the system, and revoke the associated permis /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -760,6 +844,6 @@ Visit the [Manage users](./manage-users.mdx) page to learn more about assigning ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from "/_includes/docs-feedback.mdx"; diff --git a/docs/weaviate/configuration/rbac/manage-users.mdx b/docs/weaviate/configuration/rbac/manage-users.mdx index 483edc2e..632a2740 100644 --- a/docs/weaviate/configuration/rbac/manage-users.mdx +++ b/docs/weaviate/configuration/rbac/manage-users.mdx @@ -1,38 +1,42 @@ --- + title: Manage users sidebar_label: Manage users sidebar_position: 1 image: og/docs/configuration.jpg + # tags: ['rbac', 'roles', 'configuration', 'authorization'] ---- -import Link from '@docusaurus/Link'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock'; -import PyCode from '!!raw-loader!/_includes/code/python/howto.configure.rbac.permissions.py'; -import TSCode from '!!raw-loader!/_includes/code/typescript/howto.configure.rbac.permissions.ts'; -import UserPyCode from '!!raw-loader!/_includes/code/python/howto.configure.rbac.users.py'; -import UserTSCode from '!!raw-loader!/_includes/code/typescript/howto.configure.rbac.users.ts'; -import OidcUserPyCode from '!!raw-loader!/_includes/code/python/howto.configure.rbac.oidc.users.py'; -import OidcUserTSCode from '!!raw-loader!/_includes/code/typescript/howto.configure.rbac.oidc.users.ts'; +--- +import Link from "@docusaurus/Link"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.permissions.py"; +import TSCode from "!!raw-loader!/_includes/code/typescript/howto.configure.rbac.permissions.ts"; +import UserPyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.users.py"; +import UserTSCode from "!!raw-loader!/_includes/code/typescript/howto.configure.rbac.users.ts"; +import OidcUserPyCode from "!!raw-loader!/_includes/code/python/howto.configure.rbac.oidc.users.py"; +import OidcUserTSCode from "!!raw-loader!/_includes/code/typescript/howto.configure.rbac.oidc.users.ts"; +import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-users.java"; +import OidcUserJavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/rbac-oidc-users.java"; +import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/configure/rbac.users_test.go"; +import OidcUserGoCode from "!!raw-loader!/_includes/code/howto/go/docs/configure/rbac.oidc.users_test.go"; :::info Added in `v1.29` and `v1.30` -Role-based access control (RBAC) is generally available in Weaviate from version `v1.29`. +Role-based access control (RBAC) is generally available in Weaviate from version `v1.29`. User management is available from version `v1.30`. ::: In Weaviate, Role-based access control (RBAC) allows you to define roles and assign permissions to those roles. Users can then be assigned to roles and inherit the permissions associated with those roles. -Weaviate differentiates multiple types of users. **Database users** are fully managed by the Weaviate instance, while **OIDC** users are managed by an external identity provider. Both types can be used together with RBAC. +Weaviate differentiates multiple types of users. **Database users** are fully managed by the Weaviate instance, while **OIDC** users are managed by an external identity provider. Both types can be used together with RBAC. On this page, you will find examples of how to programmatically **manage users** and their associated roles with Weaviate client libraries. :::note User types in Weaviate - - Under the hood, Weaviate differentiates three types of users: - `db_user`: Database users that can be fully managed through the API. @@ -58,30 +62,32 @@ This example shows how to get a list of all the users (`db_user`, `db_env_user` + text={UserTSCode} + startMarker="// START ListAllUsers" + endMarker="// END ListAllUsers" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - +
- Example results +Example results ```text [ @@ -107,30 +113,32 @@ This example creates a user called `custom-user`. + text={UserTSCode} + startMarker="// START CreateUser" + endMarker="// END CreateUser" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - +
- Example results +Example results ```text RXF1dU1VcWM1Q3hvVndYT0F1OTBOTDZLZWx0ME5kbWVJRVdPL25EVW12QT1fMXlDUEhUNjhSMlNtazdHcV92MjAw @@ -160,18 +168,20 @@ This example deletes a user called `custom-user`. /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -190,30 +200,32 @@ This example updates (rotates) the API key for `custom-user`. + text={UserTSCode} + startMarker="// START RotateApiKey" + endMarker="// END RotateApiKey" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - +
- Example results +Example results ```text SSs3WGVFbUxMVFhlOEsxVVMrQVBzM1VhQTJIM2xXWngwY01HaXFYVnM1az1fMXlDUEhUNjhSMlNtazdHcV92MjAw @@ -247,18 +259,20 @@ This example assigns the custom `testRole` role and predefined `viewer` role to /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -283,21 +297,23 @@ This example removes the role `testRole` from the user `custom-user`. startMarker="// START RevokeRoles" endMarker="// END RevokeRoles" language="ts" - /> + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -316,30 +332,32 @@ Retrieve the role information for any user. - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - +
- Example results +Example results ```text testRole @@ -350,7 +368,7 @@ viewer ## OIDC users: Permissions management {#oidc-user-permissions-management} -When using [OIDC](/deploy/configuration/oidc.md), an identity provider authenticates the user and issues tokens, which are then validated by Weaviate. These users can be assigned roles with custom permissions using RBAC. +When using [OIDC](https://www.google.com/search?q=/deploy/configuration/oidc.md), an identity provider authenticates the user and issues tokens, which are then validated by Weaviate. These users can be assigned roles with custom permissions using RBAC. ### Assign a role to an OIDC user @@ -369,25 +387,27 @@ This example assigns the custom `testRole` role and predefined `viewer` role to + text={OidcUserTSCode} + startMarker="// START AssignOidcUserRole" + endMarker="// END AssignOidcUserRole" + language="ts" + /> - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -408,25 +428,27 @@ This example removes the role `testRole` from the user `custom-user`. - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - + @@ -445,30 +467,32 @@ Retrieve the role information for an OIDC user. - -```go -// Go support coming soon -``` - + - -```java -// Java support coming soon -``` - +
- Example results +Example results ```text testRole @@ -479,12 +503,12 @@ viewer ## Further resources -- [RBAC: Overview](./index.mdx) -- [RBAC: Configuration](/deploy/configuration/configuring-rbac.md) -- [RBAC: Manage roles](./manage-roles.mdx) +- [RBAC: Overview](https://www.google.com/search?q=./index.mdx) +- [RBAC: Configuration](https://www.google.com/search?q=/deploy/configuration/configuring-rbac.md) +- [RBAC: Manage roles](https://www.google.com/search?q=./manage-roles.mdx) ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from "/_includes/docs-feedback.mdx"; diff --git a/docs/weaviate/manage-collections/collection-aliases.mdx b/docs/weaviate/manage-collections/collection-aliases.mdx index 9c159dcf..d35f52b1 100644 --- a/docs/weaviate/manage-collections/collection-aliases.mdx +++ b/docs/weaviate/manage-collections/collection-aliases.mdx @@ -14,15 +14,14 @@ import TSCode from "!!raw-loader!/_includes/code/howto/manage-data.aliases.ts"; import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/manage-data.aliases_test.go"; import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java"; -:::caution Technical preview +:::info Added in `v1.32` +::: -Collection aliases were added in **`v1.32`** as a **technical preview**.

-This means that the feature is still under development and may change in future releases, including potential breaking changes. -**We do not recommend using this feature in production environments at this time.** +Collection aliases allow you to create alternative names for your collections. This is useful for migrating between collections without downtime, A/B testing, or providing more convenient names for collections. An alias acts as a reference to a collection - when you query and manage objects using an alias name, Weaviate automatically routes the request to the target collection. -::: +import CollectionAliasUsage from "/_includes/collection-alias-usage.mdx"; -Collection aliases allow you to create alternative names for your collections. This is useful for changing collection definitions without downtime, A/B testing, or providing more convenient names for collections. An alias acts as a reference to a collection - when you query using an alias name, Weaviate automatically routes the request to the target collection. + ## Create an alias @@ -67,8 +66,9 @@ To create an alias, specify the alias name and the target collection it should p - An alias name must be unique and cannot match any existing collection or alias name - Multiple aliases can point to the same collection -- Aliases can be used instead of collection names in most operations (except when deleting collections) - ::: +- Aliases can only be used instead of collection names in object-related operations (managing objects and querying) + +::: ## List all aliases @@ -235,7 +235,8 @@ Updating an alias is particularly useful for migrations: 3. Update the alias to point to the new collection 4. Continue to use the alias - all queries to it are now directed to the new collection -For a code example on how to perform migrations, visit the [Starter guide: Managing collections](../starter-guides/managing-collections/index.mdx#collection-aliases) +For a code example on how to perform migrations, visit the [Tutorial: Migrating collections with aliases](../tutorials/collection-aliases.mdx) + ::: ## Delete an alias @@ -285,7 +286,7 @@ Remove an alias. This only deletes the alias pointer, not the underlying collect ## Using aliases in operations -Once created, aliases can be used instead of collection names in most operations (except when deleting collections): +Once created, aliases can be used instead of collection names in all object-related operations, like data import and querying. diff --git a/docs/weaviate/manage-objects/delete.mdx b/docs/weaviate/manage-objects/delete.mdx index e5f9782c..99703fe6 100644 --- a/docs/weaviate/manage-objects/delete.mdx +++ b/docs/weaviate/manage-objects/delete.mdx @@ -151,9 +151,9 @@ To delete objects that match a set of criteria, specify the collection and a [`w
-### ContainsAny / ContainsAll +### ContainsAny / ContainsAll / ContainsNone -Use `ContainsAny` / `ContainsAll` filters to delete of objects by a set of criteria. +Use `ContainsAny` / `ContainsAll` / `ContainsNone` filters to delete of objects by a set of criteria. diff --git a/docs/weaviate/manage-objects/import.mdx b/docs/weaviate/manage-objects/import.mdx index 7271962c..620d0a19 100644 --- a/docs/weaviate/manage-objects/import.mdx +++ b/docs/weaviate/manage-objects/import.mdx @@ -15,7 +15,7 @@ import JavaCode from '!!raw-loader!/_includes/code/howto/java/src/test/java/io/w import GoCode from '!!raw-loader!/_includes/code/howto/go/docs/manage-data.import_test.go'; import SkipLink from '/src/components/SkipValidationLink' -[Batch imports](../tutorials/import.md#to-batch-or-not-to-batch) are an efficient way to add multiple data objects and cross-references. +[Batch imports](../tutorials/import.mdx) are an efficient way to add multiple data objects and cross-references.
Additional information @@ -44,6 +44,7 @@ The following example adds objects to the `MyCollection` collection. ### Error handling + During a batch import, any failed objects or references will be stored and can be obtained through `batch.failed_objects` and `batch.failed_references`. Additionally, a running count of failed objects and references is maintained and can be accessed through `batch.number_errors` within the context manager. This counter can be used to stop the import process in order to investigate the failed objects or references. @@ -51,8 +52,6 @@ This counter can be used to stop the import process in order to investigate the Find out more about error handling on the Python client [reference page](/weaviate/client-libraries/python). - - - - - +## Server-side batching + +:::caution Preview + +Server-side batching was added in **`v1.33`** as a **preview**.

+This means that the feature is still under development and may change in future releases, including potential breaking changes. +**We do not recommend using this feature in production environments at this time.** + +::: + +Here's how to import objects into a collection named `MyCollection` using [server-side batch imports](../concepts/data-import.mdx#server-side-batching). The client will send data in batch sizes using feedback from the server. + + + + + +### Error handling + + + +During a batch import, any failed objects or references will be stored and can be obtained through `batch.failed_objects` and `batch.failed_references`. +Additionally, a running count of failed objects and references is maintained and can be accessed through `batch.number_errors` within the context manager. +This counter can be used to stop the import process in order to investigate the failed objects or references. + +Find out more about error handling on the Python client [reference page](/weaviate/client-libraries/python). + + + + +```typescript +// TypeScript support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + +```go +// Go support coming soon +``` + + + + ## Use the gRPC API :::info Added in `v1.23`. @@ -94,16 +145,19 @@ The [gRPC API](../api/index.mdx) is faster than the REST API. Use the gRPC API t -The Python client uses gRPC by default. -
The legacy Python client does not support gRPC. +The Python client uses gRPC by default. + +
+The legacy Python client does not support gRPC.
The TypeScript client v3 uses gRPC by default. -
The legacy TypeScript client does not support gRPC. +
+The legacy TypeScript client does not support gRPC.
@@ -115,6 +169,7 @@ Config config = new Config("http", "localhost:8080"); config.setGRPCSecured(false); config.setGRPCHost("localhost:50051"); ``` + @@ -137,6 +192,7 @@ if err != nil { require.Nil(t, err) } ``` + @@ -175,8 +231,6 @@ Weaviate generates an UUID for each object. Object IDs must be unique. If you se language="py" /> - - - - - - - - - - - - - - ## Import with references @@ -317,20 +359,20 @@ To try the example code, download the sample data and create the sample input fi - + - + @@ -349,18 +391,17 @@ To try the example code, download the sample data and create the sample input fi /> - +
-
Stream CSV files example code @@ -374,12 +415,12 @@ To try the example code, download the sample data and create the sample input fi /> - + @@ -390,7 +431,7 @@ To try the example code, download the sample data and create the sample input fi :::info Added in `v1.25`. ::: -import BatchVectorizationOverview from '/_includes/code/client-libraries/batch-import.mdx'; +import BatchVectorizationOverview from "/_includes/code/client-libraries/batch-import.mdx"; @@ -436,22 +477,23 @@ weaviate: ### Automatically add new tenants -import AutoTenant from '/_includes/auto-tenant.mdx'; +import AutoTenant from "/_includes/auto-tenant.mdx"; - + For details, see [auto-tenant](/weaviate/manage-collections/multi-tenancy#automatically-add-new-tenants). - -## Related pages +## Further resources - [Connect to Weaviate](/weaviate/connections/index.mdx) - [How-to: Create objects](./create.mdx) -- References: REST - /v1/batch +- + References: REST - /v1/batch + - [Configuration: Indexes](/weaviate/config-refs/indexing/vector-index.mdx#asynchronous-indexing) ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from "/_includes/docs-feedback.mdx"; - + diff --git a/docs/weaviate/search/filters.md b/docs/weaviate/search/filters.md index 2845310f..7f654dab 100644 --- a/docs/weaviate/search/filters.md +++ b/docs/weaviate/search/filters.md @@ -78,7 +78,7 @@ The output is like this: ## Filter with multiple conditions -To filter with two or more conditions, use `And` or `Or` to define the relationship between the conditions. +To filter with two or more conditions, use `And`, `Or` and `Not` to define the relationship between the conditions. @@ -419,9 +419,91 @@ The output is like this:
-## `ContainsAny` and `ContainsAll` with batch delete +## `ContainsNone` Filter -If you want to do a batch delete, see [Delete objects](../manage-objects/delete.mdx#containsany--containsall). +The `ContainsNone` operator works on text properties and take an array of values as input. It will match objects where the property **contains none** of the values in the array. + + + + + + + +```typescript +// TypeScript/JavaScript support coming soon +``` + + + + +```java +// Java support coming soon +``` + + + + + + + +
+ Example response + +The output is like this: + +```json +{ + "data": { + "Get": { + "JeopardyQuestion": [ + { + "answer": "Frank Lloyd Wright", + "hasCategory": [ + { + "title": "PEOPLE" + } + ], + "question": "In 1939 this famous architect polished off his Johnson Wax Building in Racine, Wisconsin" + }, + { + "answer": "a luffa", + "hasCategory": [ + { + "title": "FOOD" + } + ], + "question": "When it's young & tender, this gourd used in the bathtub can be eaten like a squash" + }, + { + "answer": "a snail", + "hasCategory": [ + { + "title": "SCIENCE & NATURE" + } + ], + "question": "Like an escargot, the abalone is an edible one of these gastropods" + } + ] + } + } +} +``` + +
+ +## `ContainsAny`, `ContainsAll` and `ContainsNone` with batch delete + +If you want to do a batch delete, see [Delete objects](../manage-objects/delete.mdx#containsany--containsall--containsnone). ## Filter text on partial matches diff --git a/docs/weaviate/search/index.mdx b/docs/weaviate/search/index.mdx index dead1c5b..29164f22 100644 --- a/docs/weaviate/search/index.mdx +++ b/docs/weaviate/search/index.mdx @@ -1,12 +1,12 @@ --- -title: Search +title: Query and search sidebar_position: 0 image: og/docs/howto.jpg hide_table_of_contents: true # tags: ['how to', 'perform a search'] --- -Use these **search** how-to guides to find the data you want. +Use these **query and search** how-to guides to find the data you want. import CardsSection from "/src/components/CardsSection"; diff --git a/docs/weaviate/starter-guides/managing-collections/index.mdx b/docs/weaviate/starter-guides/managing-collections/index.mdx index 28862ce1..9bf3f2ee 100644 --- a/docs/weaviate/starter-guides/managing-collections/index.mdx +++ b/docs/weaviate/starter-guides/managing-collections/index.mdx @@ -232,66 +232,14 @@ Sharding settings determine how each collection is sharded and distributed acros ## Collection aliases -:::caution Technical preview +:::info Added in `v1.32` +::: -Collection aliases were added in **`v1.32`** as a **technical preview**.

-This means that the feature is still under development and may change in future releases, including potential breaking changes. -**We do not recommend using this feature in production environments at this time.** +Collection aliases are alternative names (pointers) for Weaviate collections that allow you to reference a collection by multiple names. When you query using an alias, Weaviate automatically routes the request to the target collection. You can set up collection aliases [programmatically through client libraries](../../manage-collections/collection-aliases.mdx) or by using the REST endpoints. -::: +import CollectionAliasUsage from "/_includes/collection-alias-usage.mdx"; -Collection aliases are alternative names (pointers) for Weaviate collections that allow you to reference a collection by multiple names. When you query using an alias, Weaviate automatically routes the request to the target collection. You can set up collection aliases [programmatically through client libraries](../../manage-collections/collection-aliases.mdx) or by using the REST endpoints. - -import Tabs from "@theme/Tabs"; -import TabItem from "@theme/TabItem"; -import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; -import PyCode from "!!raw-loader!/_includes/code/howto/manage-data.aliases.py"; -import TSCode from "!!raw-loader!/_includes/code/howto/manage-data.aliases.ts"; -import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/manage-data.aliases_test.go"; -import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java"; - -### Migration workflow with collection aliases - -Collection aliases enable collection migrations with zero downtime. Previously, migrating to a new collection required creating the new collection, pausing your application, updating all collection references in your code, then restarting the application, resulting in service interruption. - -Now with aliases, simply create a new collection with your updated collection definition, migrate your data, then instantly [switch the alias](../../manage-collections/collection-aliases.mdx#update-an-alias) to point to the new collection. All your existing queries will continue running uninterrupted. Your application code remains unchanged, as it references the stable alias name rather than the underlying collection. - -Here's a complete example showing how to use aliases for a collection migration: - - - - - - - - - - - - - - - + ## Notes diff --git a/docs/weaviate/starter-guides/managing-resources/compression.mdx b/docs/weaviate/starter-guides/managing-resources/compression.mdx index 0123dfe6..f6ef1577 100644 --- a/docs/weaviate/starter-guides/managing-resources/compression.mdx +++ b/docs/weaviate/starter-guides/managing-resources/compression.mdx @@ -19,7 +19,7 @@ This page discusses compression algorithms. For more on indexes, see [Vector ind These compression algorithms are available: -import CompressionAlgorithms from '/_includes/starter-guides/compression-types.mdx'; +import CompressionAlgorithms from "/_includes/starter-guides/compression-types.mdx"; @@ -38,11 +38,11 @@ Performance and cost are also important considerations. See [Cost, recall, and s This table shows the compression algorithms that are available for each index type. | Compression type | HNSW index | Flat index | Dynamic index | -| :- | :- | :- | :- | -| PQ | Yes | No | Yes | -| SQ | Yes | No | Yes | -| RQ | Yes | No | Yes | -| BQ | Yes | Yes | Yes | +| :--------------- | :--------- | :--------- | :------------ | +| PQ | Yes | No | Yes | +| SQ | Yes | No | Yes | +| RQ | Yes | No | Yes | +| BQ | Yes | Yes | Yes | The [dynamic index](/weaviate/config-refs/indexing/vector-index.mdx#dynamic-index) is new in v1.25. This type of index is a [flat index](/weaviate/config-refs/indexing/vector-index.mdx#flat-index) until a collection reaches a threshold size. When the collection grows larger than the threshold size, the default is 10,000 objects, the collection is automatically reindexed and converted to an HNSW index. @@ -72,6 +72,7 @@ Recall measures how well an algorithm finds true positive matches in a data set. A compressed vector has less information than the corresponding uncompressed vector. An uncompressed vector that would normally match a search query might be missed if the target information is missing in the compressed vector. That missed match lowers recall. Typical recall rates: + - PQ: Varies based on configuration - SQ: 95-97% recall - RQ: 98-99% recall @@ -79,9 +80,9 @@ Typical recall rates: To improve recall with compressed vectors, Weaviate over-fetches a list of candidate vectors during a search. For each item on the candidate list, Weaviate fetches the corresponding uncompressed vector. To determine the final ranking, Weaviate calculates the distances from the uncompressed vectors to the query vector. -import RescoringIllustration from '/docs/weaviate/starter-guides/managing-resources/img/rescore-uncompressed-vectors.png'; +import RescoringIllustration from "/docs/weaviate/starter-guides/managing-resources/img/rescore-uncompressed-vectors.png"; -Rescoring illustration +Rescoring illustration The rescoring process is slower than an in-memory search, but since Weaviate only has to search a limited number of uncompressed vectors, the search is still very fast. Most importantly, rescoring with the uncompressed vectors greatly improves recall. @@ -127,7 +128,7 @@ Starting in v1.22, Weaviate has an optional, [asynchronous indexing](/weaviate/c Most applications benefit from compression. The cost savings are significant. In [Weaviate Cloud](https://weaviate.io/pricing), for example, compressed collections can be more than 80% cheaper than uncompressed collections. -- For most users with HNSW indexes who want the best combination of simplicity, performance, and recall, **consider RQ compression**. RQ provides 4x compression with 98-99% recall and requires no configuration or training. It's ideal for standard use cases with embeddings from providers like OpenAI. +- For most users with HNSW indexes who want the best combination of simplicity, performance, and recall, **consider 8-bit RQ compression**. RQ provides 4x compression with 98-99% recall and requires no configuration or training. It's ideal for standard use cases with embeddings from providers like OpenAI. - If you have a small collection that uses a flat index, consider a BQ index. The BQ index is 32 times smaller and much faster than the uncompressed equivalent. @@ -135,11 +136,7 @@ Most applications benefit from compression. The cost savings are significant. In For collections that are small, but that are expected to grow, consider a dynamic index. In addition to setting the dynamic index type, configure the collection to use BQ compression while the index is flat and RQ compression when the collection grows large enough to move from a flat index to an HNSW index. -## Related pages - -For more information, see these documentation pages and blog posts. - -### Documentation pages +## Further resources To enable compression, follow the steps on these pages: @@ -148,26 +145,19 @@ To enable compression, follow the steps on these pages: - [Rotational quantization (RQ)](../../configuration/compression/rq-compression.md) - [Binary quantization (BQ)](../../configuration/compression/bq-compression.md) -For more documentation details, see: - -- [Compression discussion](/weaviate/concepts/vector-quantization) +For more theoretical concepts and vector quantization in general, see: -### Blog posts +- [Concepts: Vector quantization](/weaviate/concepts/vector-quantization) -For in-depth discussions, see: +For in-depth discussions about the quantization techniques, see the following blog posts: +- [RQ quantization](https://weaviate.io/blog/8-bit-rotational-quantization) - [PQ and memory reduction](https://weaviate.io/blog/pq-rescoring) - [BQ and memory reduction](https://weaviate.io/blog/binary-quantization) - [PQ and HNSW explained](https://weaviate.io/blog/ann-algorithms-hnsw-pq) -### Pricing calculator - -To review Weaviate Cloud pricing for compressed and uncompressed vectors, see: - -[Weaviate cloud pricing calculator](https://weaviate.io/pricing) - ## Questions and feedback -import DocsFeedback from '/_includes/docs-feedback.mdx'; +import DocsFeedback from "/_includes/docs-feedback.mdx"; - + diff --git a/docs/weaviate/tutorials/_includes/bulk-import.py b/docs/weaviate/tutorials/_includes/bulk-import.py new file mode 100644 index 00000000..92fcf333 --- /dev/null +++ b/docs/weaviate/tutorials/_includes/bulk-import.py @@ -0,0 +1,311 @@ +import weaviate +import weaviate.classes as wvc +import json +import os +import urllib.request + +# START ConnectToWeaviate +# Connect to Weaviate instance +client = weaviate.connect_to_local( + headers={ + "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"] # Replace with your API key + } +) +# END ConnectToWeaviate + +# START LoadData +# Download dataset directly from URL +url = "https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json" +urllib.request.urlretrieve(url, "jeopardy_tiny.json") + +# Load data from JSON file +with open("jeopardy_tiny.json", "r") as f: + data = json.load(f) + +# Prepare data for import +data_rows = [] +for item in data: + data_rows.append( + { + "question": item["Question"], + "answer": item["Answer"], + "category": item["Category"], + } + ) + +print(f"Loaded {len(data_rows)} questions") +expected_count = len(data_rows) +# END LoadData + +# Clean up any existing collection +client.collections.delete("JeopardyQuestion") + +# START CreateCollection +# Create a collection for Jeopardy questions +collection = client.collections.create( + name="JeopardyQuestion", + vector_config=wvc.config.Configure.Vectors.text2vec_openai(), + properties=[ + wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT), + ], +) +# END CreateCollection + +# START ServerSideBatch +# Server-side batching (automatic mode) - Recommended approach +# The server manages the import flow automatically +with collection.batch.automatic() as batch: + # Import data + for data_row in data_rows: + batch.add_object( + properties=data_row, + ) + + # Optional: Stop if too many errors + if batch.number_errors > 10: + print("Batch import stopped due to excessive errors.") + break + +# Check for failed objects +failed_objects = collection.batch.failed_objects +if failed_objects: + print(f"Number of failed imports: {len(failed_objects)}") + print(f"First failed object: {failed_objects[0]}") +else: + print("All objects imported successfully!") + +# Verify server-side batch import +result = collection.aggregate.over_all(total_count=True) +assert len(failed_objects) == 0, f"Server-side batch had {len(failed_objects)} failures" +assert ( + result.total_count == expected_count +), f"Expected {expected_count} objects, got {result.total_count}" +print(f"✓ Server-side batch: {result.total_count} objects imported successfully") +# END ServerSideBatch + +# Alternative approach - Client-side batching +# Clean and recreate collection for demo +client.collections.delete("JeopardyQuestion") +collection = client.collections.create( + name="JeopardyQuestion", + vector_config=wvc.config.Configure.Vectors.text2vec_openai(), + properties=[ + wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT), + ], +) + +# START ClientSideBatch +# Client-side batching with manual configuration +# You control batch size and concurrency +with collection.batch.fixed_size( + batch_size=100, # Number of objects per batch + concurrent_requests=2, # Number of parallel requests +) as batch: + # Import data + for data_row in data_rows: + batch.add_object( + properties=data_row, + ) + +# Check for failed objects +failed_objects = collection.batch.failed_objects +if failed_objects: + print(f"Number of failed imports: {len(failed_objects)}") + for failed in failed_objects[:3]: # Show first 3 failures + print(f"Failed object: {failed}") + +# Verify client-side batch import +result = collection.aggregate.over_all(total_count=True) +assert len(failed_objects) == 0, f"Client-side batch had {len(failed_objects)} failures" +assert ( + result.total_count == expected_count +), f"Expected {expected_count} objects, got {result.total_count}" +print(f"✓ Client-side batch: {result.total_count} objects imported successfully") +# END ClientSideBatch + +# START ErrorHandling +# Comprehensive error handling during import +# Clean and recreate for demo +client.collections.delete("JeopardyQuestion") +collection = client.collections.create( + name="JeopardyQuestion", + vector_config=wvc.config.Configure.Vectors.text2vec_openai(), + properties=[ + wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT), + ], +) + +import_errors = [] + +with collection.batch.fixed_size(batch_size=100) as batch: + for idx, data_row in enumerate(data_rows): + try: + batch.add_object(properties=data_row) + except Exception as e: + import_errors.append({"index": idx, "data": data_row, "error": str(e)}) + continue + + # Check batch errors periodically + if idx % 100 == 0 and idx > 0: + if batch.number_errors > 0: + print(f"Errors at index {idx}: {batch.number_errors}") + # Optionally retrieve and log failed objects + for failed_obj in collection.batch.failed_objects[-10:]: + print(f"Failed: {failed_obj.message}") + +# Final error report +if import_errors: + print(f"\nTotal import errors: {len(import_errors)}") + print("Sample errors:") + for error in import_errors[:3]: + print(f" Index {error['index']}: {error['error']}") +# END ErrorHandling + +# START VerifyImport +# Verify the import was successful +result = collection.aggregate.over_all(total_count=True) +print(f"\nTotal objects in collection: {result.total_count}") + +# Query a few objects to verify +results = collection.query.fetch_objects(limit=3) +print("\nSample imported objects:") +for obj in results.objects: + print(f"- Question: {obj.properties['question'][:50]}...") + print(f" Answer: {obj.properties['answer']}") + print(f" Category: {obj.properties['category']}\n") +# END VerifyImport + +# START CustomVectors +# Import with custom vectors (if you have pre-computed embeddings) +import numpy as np + +# Example: Create a collection that accepts custom vectors +client.collections.delete("JeopardyCustomVectors") +collection_custom = client.collections.create( + name="JeopardyCustomVectors", + vector_config=wvc.config.Configure.Vectors.self_provided(), + properties=[ + wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT), + ], +) + +# Import objects with custom vectors +custom_import_count = 5 # Import first 5 for demo +with collection_custom.batch.fixed_size(batch_size=100) as batch: + for data_row in data_rows[:custom_import_count]: + # Generate a random vector for demonstration + # In practice, this would be your pre-computed embedding + custom_vector = np.random.rand(1536).tolist() + + batch.add_object( + properties={ + "question": data_row["question"], + "answer": data_row["answer"], + "category": data_row["category"], + }, + vector=custom_vector, + ) + +# Verify custom vectors import +failed_custom = collection_custom.batch.failed_objects +result_custom = collection_custom.aggregate.over_all(total_count=True) +assert ( + len(failed_custom) == 0 +), f"Custom vectors batch had {len(failed_custom)} failures" +assert ( + result_custom.total_count == custom_import_count +), f"Expected {custom_import_count} objects, got {result_custom.total_count}" +print(f"✓ Custom vectors: {result_custom.total_count} objects imported successfully!") +# END CustomVectors + + +# START ChunkedImport +# Import large datasets in chunks with progress tracking +def import_large_dataset(collection, data_rows, chunk_size=1000): + """ + Import data in chunks with progress tracking and checkpointing + """ + total_objects = len(data_rows) + imported_count = 0 + failed_count = 0 + + # Process in chunks + for chunk_start in range(0, total_objects, chunk_size): + chunk_end = min(chunk_start + chunk_size, total_objects) + chunk = data_rows[chunk_start:chunk_end] + + print(f"\nImporting chunk {chunk_start}-{chunk_end} of {total_objects}") + + with collection.batch.automatic() as batch: + for data_row in chunk: + batch.add_object(properties=data_row) + + # Track progress + chunk_failed = len(collection.batch.failed_objects) + chunk_succeeded = len(chunk) - chunk_failed + imported_count += chunk_succeeded + failed_count += chunk_failed + + # Progress report + progress = (chunk_end / total_objects) * 100 + print(f"Progress: {progress:.1f}% ({imported_count}/{total_objects} imported)") + + if chunk_failed > 0: + print(f" Warning: {chunk_failed} objects failed in this chunk") + + # Optional: Save checkpoint for resume capability + checkpoint = { + "last_processed_index": chunk_end, + "imported_count": imported_count, + "failed_count": failed_count, + } + with open("import_checkpoint.json", "w") as f: + json.dump(checkpoint, f) + + # Final report + print(f"\n=== Import Complete ===") + print(f"Total imported: {imported_count}/{total_objects}") + print(f"Total failed: {failed_count}") + print(f"Success rate: {(imported_count/total_objects)*100:.1f}%") + + return imported_count, failed_count + + +# Test chunked import with small chunks for demo +client.collections.delete("JeopardyQuestion") +collection = client.collections.create( + name="JeopardyQuestion", + vector_config=wvc.config.Configure.Vectors.text2vec_openai(), + properties=[ + wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT), + wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT), + ], +) + +# Run chunked import with small chunks to demonstrate the feature +imported, failed = import_large_dataset(collection, data_rows, chunk_size=3) + +# Verify chunked import +result_chunked = collection.aggregate.over_all(total_count=True) +assert failed == 0, f"Chunked import had {failed} failures" +assert ( + result_chunked.total_count == expected_count +), f"Expected {expected_count} objects, got {result_chunked.total_count}" +print(f"✓ Chunked import: {result_chunked.total_count} objects imported successfully") +# END ChunkedImport + +# Clean up +client.collections.delete("JeopardyQuestion") +client.collections.delete("JeopardyCustomVectors") +os.remove("import_checkpoint.json") +os.remove("jeopardy_tiny.json") +client.close() diff --git a/docs/weaviate/tutorials/_includes/collection_alias_tutorial.png b/docs/weaviate/tutorials/_includes/collection_alias_tutorial.png new file mode 100644 index 00000000..cea72e73 Binary files /dev/null and b/docs/weaviate/tutorials/_includes/collection_alias_tutorial.png differ diff --git a/docs/weaviate/tutorials/_includes/joepardyquestion_dataset.png b/docs/weaviate/tutorials/_includes/joepardyquestion_dataset.png new file mode 100644 index 00000000..2f3fcdd2 Binary files /dev/null and b/docs/weaviate/tutorials/_includes/joepardyquestion_dataset.png differ diff --git a/docs/weaviate/tutorials/collection-aliases.mdx b/docs/weaviate/tutorials/collection-aliases.mdx new file mode 100644 index 00000000..b3518f36 --- /dev/null +++ b/docs/weaviate/tutorials/collection-aliases.mdx @@ -0,0 +1,409 @@ +--- +title: Zero-downtime collection migration with aliases +description: Learn how to migrate Weaviate collections without service interruption using aliases +sidebar_position: 3 +image: og/docs/tutorials.jpg +# tags: ['migration', 'aliases', 'collections'] +--- + +import SkipLink from "/src/components/SkipValidationLink"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/_includes/code/howto/manage-data.aliases.py"; +import TSCode from "!!raw-loader!/_includes/code/howto/manage-data.aliases.ts"; +import GoCode from "!!raw-loader!/_includes/code/howto/go/docs/manage-data.aliases_test.go"; +import JavaCode from "!!raw-loader!/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.collection-aliases.java"; + +In this tutorial, we will explore how to use **collection aliases** in Weaviate to perform zero-downtime migrations. Collection aliases are alternative names for Weaviate collections that allow you to reference a collection by multiple names. This powerful feature enables you to migrate to new collection schemas, update configurations, or reorganize your data without any service interruption. + +## Prerequisites + +Before starting this tutorial, ensure you have the following: + +- An instance of Weaviate (e.g. on [Weaviate Cloud](https://console.weaviate.cloud), or locally), version `v1.32` or newer. +- Your preferred Weaviate [client library](../client-libraries/index.mdx) installed. +- Basic familiarity with Weaviate collections and data import. + +:::tip See the Quickstart guide + +For information on how to set up Weaviate and install the client library, see the [cloud](../quickstart/index.md) or [local](../quickstart/local.md) Quickstart guide. + +::: + +## Introduction + +Traditional collection migrations require significant downtime. The typical workflow involves: + +1. Creating a new collection +2. Stopping your application +3. Migrating data +4. Updating all collection references in your code +5. Restarting your application + +This process causes service interruption and requires code changes. With aliases, you can eliminate both issues. + +### What are collection aliases? + +A collection alias is a pointer to an underlying collection. When you query using an alias, Weaviate automatically routes the request to the target collection. Think of it like a symbolic link in a file system or a DNS alias for a website. + +![Collection alias concept visualization](./_includes/collection_alias_tutorial.png "Collection alias concept visualization") + +Collection aliases are ideal for **schema migrations** (updating properties or vectorization settings), **A/B testing**, and **disaster recovery**. They add minimal routing overhead and enable instant switching between collection versions without code changes. + +import CollectionAliasUsage from "/_includes/collection-alias-usage.mdx"; + + + +### How aliases enable zero-downtime migration + +Aliases allow you to keep your application code unchanged as it references the stable alias name. You can switch between collections instantly and roll back quickly if needed. + +The migration process becomes: + +1. Create a new collection with updated schema +2. Migrate data (while the old collection serves traffic) +3. Update the alias to point to the new collection (instant switch) +4. Delete the old collection after verification + +## Tutorial: Migrating a products collection + +Let's walk through a complete migration scenario where we need to add a new field to an existing collection of products. + +### Step 1: Connect to Weaviate + +First, connect to your Weaviate instance using your preferred client library. + + + + + + + + + + + + + + + + +### Step 2: Create the original collection + +Let's create our initial products collection and populate it with data. + + + + + + + + + + + + + + + + +### Step 3: Create an alias for production access + +Now create an alias that your application will use. This decouples your application code from the specific collection version. + + + + + + + + + + + + + + + + +### Step 4: Use the alias in your application + +Your application code should reference the alias, not the underlying collection. This ensures it continues working regardless of which collection version is active. + + + + + + + + + + + + + + + + +The key point is that your application code doesn't need to know whether it's accessing `Products_v1` or `Products_v2` - it just uses the stable alias name. + +### Step 5: Create the new collection with updated schema + +Now let's create a new version of the collection with an additional field (e.g., adding a `category` property). + + + + + + + + + + + + + + + + +### Step 6: Migrate data to the new collection + +Copy data from the old collection to the new one, adding default values for new fields or transforming data as needed. + + + + + + + + + + + + + + + + +### Step 7: Update the alias (instant switch) + +This is the magic moment - update the alias to point to the new collection. This switch is instantaneous, and all queries using the `ProductsAlias` alias now access the new collection. + + + + + + + + + + + + + + + + +### Step 8: Verify and clean up + +After verifying that everything works correctly with the new collection, you can safely delete the old one. + + + + + + + + + + + + + + + + +## Summary + +This tutorial demonstrated how to use collection aliases in Weaviate for zero-downtime migrations. Key takeaways: + +- **Aliases are pointers** to collections that enable instant switching between versions +- **Zero downtime** is achieved by preparing the new collection while the old one serves traffic +- **Application code remains unchanged** when using aliases instead of direct collection names +- **Rollback is simple** - just point the alias back to the previous collection + +Collection aliases are essential for production Weaviate deployments where uptime is critical. They enable confident migrations, A/B testing, and flexible deployment strategies without service interruption. + +## Further resources + +- [How-to: Collection aliases](../manage-collections/collection-aliases.mdx) +- + Reference: REST - Schema + + +## Questions and feedback + +import DocsFeedback from "/_includes/docs-feedback.mdx"; + + diff --git a/docs/weaviate/tutorials/import.md b/docs/weaviate/tutorials/import.md deleted file mode 100644 index f31b774d..00000000 --- a/docs/weaviate/tutorials/import.md +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: Imports in detail -description: Understand data import techniques in Weaviate for efficient data integration. -sidebar_position: 4 -image: og/docs/tutorials.jpg ---- - -import SkipLink from '/src/components/SkipValidationLink' -import UpdateInProgressNote from '/_includes/update-in-progress.mdx'; - - - -import { DownloadButton } from '/src/theme/Buttons'; - -In this section, we will explore data import, including details of the batch import process. We will discuss points such as how vectors are imported, what a batch import is, how to manage errors, and some advice on optimization. - -## Prerequisites - -Before you start this tutorial, you should follow the steps in the tutorials to have: - -- An instance of Weaviate running (e.g. on the [Weaviate Cloud](https://console.weaviate.cloud)), -- An API key for your preferred inference API, such as OpenAI, Cohere, or Hugging Face, -- Installed your preferred Weaviate client library, and -- Set up a `Question` class in your schema. - - You can follow the Quickstart guide, or the [schema tutorial](../starter-guides/managing-collections/index.mdx) to construct the Question class if you have not already. - -We will use the dataset below. We suggest that you download it to your working directory. - -

- Download jeopardy_tiny.json -

- -## Import setup - -As mentioned in the [schema tutorial](../starter-guides/managing-collections/index.mdx), the `schema` specifies the data structure for Weaviate. - -So the data import must map properties of each record to those of the relevant class in the schema. In this case, the relevant class is **Question** as defined in the previous section. - -### Data object structure - -Each Weaviate data object is structured as follows: - -```json -{ - "class": "", // as defined during schema creation - "id": "", // optional, must be in UUID format. - "properties": { - "": "", // specified in dataType defined during schema creation - } -} -``` - -Most commonly, Weaviate users import data through a Weaviate client library. - -It is worth noting, however, that data is ultimately added through the RESTful API, either through the `objects` endpoint or the `batch` endpoint. - -As the names suggest, the use of these endpoints depend on whether objects are being imported as batches or individually. - -### To batch or not to batch - -For importing data, we **strongly suggest that you use batch imports** unless you have a specific reason not to. Batch imports can greatly improve performance by sending multiple objects in a single request. - -We note that batch imports are carried out through the [`batch` REST endpoint](../manage-objects/import.mdx). - -### Batch import process - -A batch import process generally looks like this: - -1. Connect to your Weaviate instance -1. Load objects from the data file -1. Prepare a batch process -1. Loop through the records - 1. Parse each record and build an object - 1. Push the object through a batch process -1. Flush the batch process – in case there are any remaining objects in the buffer - -Here is the full code you need to import the **Question** objects: - -import CodeImportQuestions from '/_includes/code/quickstart.import.questions.mdx'; - - - -There are a couple of things to note here. - -#### Batch size - -Some clients include this as a parameter (e.g. `batch_size` in the Python client), or it can be manually set by periodically flushing the batch. - -Typically, a size between 20 and 100 is a reasonable starting point, although this depends on the size of each data object. A smaller size may be preferable for larger data objects, such as if vectors are included in each object upload. - -#### Where are the vectors? - -You may have noticed that we do not provide a vector. As a `vectorizer` is specified in our schema, Weaviate will send a request to the appropriate module (`text2vec-openai` in this case) to vectorize the data, and the vector in the response will be indexed and saved as a part of the data object. - -### Bring your own vectors - -If you wish to upload your own vectors, you can do so with Weaviate. Refer to the [this page](../manage-objects/import.mdx#specify-a-vector). - -You can also manually upload existing vectors and use a vectorizer module for vectorizing queries. - -## Confirm data import - -You can quickly check the imported object by opening `/v1/objects` in a browser, like this (replace with your Weaviate endpoint): - -``` -https://some-endpoint.semi.network/v1/objects -``` - -Or you can read the objects in your project, like this: - -import CodeImportGet from '/_includes/code/quickstart.import.get.mdx'; - - - -The result should look something like this: - -```json -{ - "deprecations": null, - "objects": [ - ... // Details of each object - ], - "totalResults": 10 // You should see 10 results here -} -``` - -## Data import - best practices - -When importing large datasets, it may be worth planning out an optimized import strategy. Here are a few things to keep in mind. - -1. The most likely bottleneck is the import script. Accordingly, aim to max out all the CPUs available. -1. To use multiple CPUs efficiently, enable sharding when you import data. For the fastest imports, enable sharding even on a single node. -1. Use [parallelization](https://www.computerhope.com/jargon/p/parallelization.htm); if the CPUs are not maxed out, just add another import process. -1. Use `htop` when importing to see if all CPUs are maxed out. -1. To avoid out-of-memory issues during imports, set `LIMIT_RESOURCES` to `True` or configure the `GOMEMLIMIT` environment variable. For details, see [Environment variables](/deploy/configuration/env-vars/index.md). -1. For Kubernetes, a few large machines are faster than many small machines (due to network latency). - -Our rules of thumb are: -* You should always use batch import. -* Use multiple shards. -* As mentioned above, max out your CPUs (on the Weaviate cluster). Often your import script is the bottleneck. -* Process error messages. -* Some clients (e.g. Python) have some built-in logic to efficiently control batch importing. - -### Error handling - - -We recommend that you implement error handling at an object level, such as in this example. - -:::tip `200` status code != 100% batch success -It is important to note that an HTTP `200` status code only indicates that the **request** has been successfully sent to Weaviate. In other words, there were no issues with the connection or processing of the batch and no malformed request. - -A request with a `200` response may still include object-level errors, which is why error handling is critical. -::: - -## Recap - -* Data to be imported should match the database schema -* Use batch import unless you have a good reason not to -* For importing large datasets, make sure to consider and optimize your import strategy. - -## Suggested reading - -- [Tutorial: Schemas in detail](../starter-guides/managing-collections/index.mdx) -- [Tutorial: Queries in detail](./query.md) -- [Tutorial: Introduction to modules](./modules.md) -- [Tutorial: Introduction to Weaviate Console](/cloud/tools/query-tool.mdx) - -### Other object operations - -All other CRUD object operations are available in the [manage-data](../manage-collections/index.mdx) section. - - -## Questions and feedback - -import DocsFeedback from '/_includes/docs-feedback.mdx'; - - diff --git a/docs/weaviate/tutorials/import.mdx b/docs/weaviate/tutorials/import.mdx new file mode 100644 index 00000000..31d8631c --- /dev/null +++ b/docs/weaviate/tutorials/import.mdx @@ -0,0 +1,261 @@ +--- +title: Batch data import +description: Learn how to import data efficiently using client-side and server-side batching in Weaviate +sidebar_position: 4 +image: og/docs/tutorials.jpg +--- + +import SkipLink from "/src/components/SkipValidationLink"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import FilteredTextBlock from "@site/src/components/Documentation/FilteredTextBlock"; +import PyCode from "!!raw-loader!/docs/weaviate/tutorials/_includes/bulk-import.py"; + +In this tutorial, we will explore how to efficiently import large datasets into Weaviate using batch imports. You'll learn the differences between client-side and server-side batching, when to use each approach, and best practices for optimal performance. + +## Prerequisites + +Before starting this tutorial, ensure you have: + +- An instance of Weaviate (e.g. on [Weaviate Cloud](https://console.weaviate.cloud), or locally), version `v1.33` or newer for server-side batching +- Your preferred Weaviate [client library](../client-libraries/index.mdx) installed +- An API key for your inference API (OpenAI, Cohere, etc.) if using vectorization +- A dataset to import (we'll provide a sample) + +## Introduction + +Weaviate offers multiple methods for importing your data: + +1. **[Object creation](../manage-objects/create.mdx)**: You create the objects in the database one-by-one with individual requests. +1. **[Loading from backup](/deploy/configuration/backups.md)**: If you created a backup of an existing collection, you can restore/import it in another Weaviate instance. +1. **Client-side batch imports**: Import objects in larger batches by controlling the batch size and timing manually. +1. **Server-side batch imports**: The server automatically manages the import flow for optimal performance and is recommended for most use cases as it automatically optimizes throughput. + +When importing data into Weaviate, **always use batch imports** instead of importing objects one by one. Batch imports can improve performance by 100x or more by: + +- Reducing network overhead through fewer HTTP requests +- Enabling parallel processing +- Optimizing database write operations + +Let's walk through importing data using both approaches. We'll create a collection for Jeopardy questions and import sample data. + +## Step 1: Connect to Weaviate + +First, connect to your Weaviate instance. + + + + + + + +## Step 2: Create a collection + +This is what our data looks like: + +![Jeopardy questions data schema visualization](./_includes/joepardyquestion_dataset.png "Jeopardy questions data schema visualization") + +Create a collection to store the Jeopardy questions. + + + + + + + +## Step 3: Prepare your data + +Load the data from the JSON file. + + + + + + + +## Step 4: Import the data + +### Option A: Server-side batching (Recommended) + +:::caution Preview + +Server-side batching was added in **`v1.33`** as a **preview**.

+This means that the feature is still under development and may change in future releases, including potential breaking changes. +**We do not recommend using this feature in production environments at this time.** + +::: + +Server-side batching automatically manages the import flow using a feedback loop between client and server. The server monitors its workload and tells the client how much data to send next. + +- Client opens a persistent connection to the server +- Client sends data in chunks based on server feedback +- Server manages an internal queue and applies backpressure when needed +- Errors are streamed back asynchronously without interrupting the flow + + + + + + + +The benefits of server-side batching are: + +- **No manual tuning**: Server automatically determines optimal batch size +- **Automatic backpressure**: Prevents server overload during heavy operations +- **Better resilience**: Handles cluster scaling events gracefully +- **Simplified code**: Less configuration needed + +### Option B: Client-side batching + +Client-side batching gives you direct control over batch size and timing. You configure parameters like batch size and manage the import flow manually. + + + + + + + +Use the following tips for configuring client-side batching: + +- **Default**: Start with 100 objects per batch +- **Large objects**: Reduce to 20-50 for objects with long texts or custom vectors +- **Small objects**: Increase to 200-500 for simple objects +- Monitor CPU and memory usage to find optimal size + +## Error handling + +Proper error handling is crucial for batch imports. A `200` HTTP status only means the request was received - individual objects may still fail. + + + + + + + +## Verify import + +After importing, verify the data was successfully added. + + + + + + + +## Importing with custom vectors + +If you have pre-computed vectors, you can include them in the import. + + + + + + + +## Best practices + +### Performance optimization + +1. **Use multiple shards**: Even on a single node, sharding improves import speed +2. **Monitor CPU usage**: Use `htop` to ensure CPUs are fully utilized +3. **Parallelize imports**: Run multiple import processes if CPUs aren't maxed out +4. **Set resource limits**: Configure `LIMIT_RESOURCES=True` to avoid out of memory issues + +### Choosing batch parameters + +For **client-side batching**: + +- Start with `batch_size=100` +- Increase `concurrent_requests` to 2-4 for better throughput +- Adjust based on object size and available resources + +For **server-side batching**: + +- No configuration needed - the server handles optimization +- Focus on error handling and monitoring + +### Large dataset strategies + +When importing millions of objects: + +1. **Split your dataset** into manageable chunks +2. **Use checkpointing** to resume failed imports +3. **Monitor progress** with regular status updates +4. **Implement retry logic** for transient failures + +Example of chunked import with progress tracking: + + + + + + + +## Summary + +This tutorial covered efficient bulk data import in Weaviate: + +- **Always use batch imports** for better performance. +- **Server-side batching** automatically optimizes throughput with no manual tuning. +- **Client-side batching** provides direct control when needed. +- **Error handling is critical** - check for object-level failures even with HTTP 200 responses. +- **Monitor and optimize** based on your data and infrastructure. + +## Further resources + +- [How-to: Batch import](../manage-objects/import.mdx) +- + API Reference: Batch operations + +- [Concepts: Data import](../concepts/data-import.mdx) + +## Questions and feedback + +import DocsFeedback from "/_includes/docs-feedback.mdx"; + + diff --git a/docs/weaviate/tutorials/index.mdx b/docs/weaviate/tutorials/index.mdx index 1058f92c..b2996f97 100644 --- a/docs/weaviate/tutorials/index.mdx +++ b/docs/weaviate/tutorials/index.mdx @@ -17,13 +17,6 @@ Here, you can learn how to: import CardsSection from "/src/components/CardsSection"; export const advancedFeaturesData = [ - { - title: "Load Data into Weaviate with Spark", - description: - "Use the Spark Connector to efficiently import large datasets from Apache Spark into Weaviate with batch processing and vectorization options.", - link: "/weaviate/tutorials/spark-connector", - icon: "fas fa-fire", - }, { title: "Use Multi-Vector Embeddings (ColBERT, etc.)", description: @@ -31,6 +24,20 @@ export const advancedFeaturesData = [ link: "/weaviate/tutorials/multi-vector-embeddings", icon: "fas fa-puzzle-piece", }, + { + title: "Zero-downtime collection migration with aliases", + description: + "Learn how to migrate Weaviate collections without service interruption using collections aliases.", + link: "/weaviate/tutorials/collection-aliases", + icon: "fas fa-share ", + }, + { + title: "Import data in bulk", + description: + "Learn how to import data efficiently using client-side and server-side batching in Weaviate.", + link: "/weaviate/tutorials/import", + icon: "fas fa-file-import", + }, { title: "Manage Relationships with Cross-References", description: @@ -38,6 +45,13 @@ export const advancedFeaturesData = [ link: "/weaviate/tutorials/cross-references", icon: "fas fa-link", }, + { + title: "Load Data into Weaviate with Spark", + description: + "Use the Spark Connector to efficiently import large datasets from Apache Spark into Weaviate with batch processing and vectorization options.", + link: "/weaviate/tutorials/spark-connector", + icon: "fas fa-fire", + }, { title: "Set up Role-Based Access Control (RBAC)", description: diff --git a/docs/weaviate/tutorials/query.md b/docs/weaviate/tutorials/query.md index 6811fe5c..6aa1c1df 100644 --- a/docs/weaviate/tutorials/query.md +++ b/docs/weaviate/tutorials/query.md @@ -38,7 +38,7 @@ First, we will start by making queries to Weaviate to retrieve **Question** obje The Weaviate function for retrieving objects is `Get`. -This might be familiar for some of you. If you have completed our [Imports in detail tutorial](./import.md), you may have performed a `Get` query to confirm that the data import was successful. Here is the same code as a reminder: +This might be familiar for some of you. If you have completed our [Imports in detail tutorial](./import.mdx), you may have performed a `Get` query to confirm that the data import was successful. Here is the same code as a reminder: import CodeImportGet from '/_includes/code/quickstart.import.get.mdx'; @@ -277,7 +277,7 @@ As you can see, the `Aggregate` function can return handy aggregated, or metadat ## Suggested reading - [Tutorial: Schemas in detail](../starter-guides/managing-collections/index.mdx) -- [Tutorial: Import in detail](./import.md) +- [Tutorial: Import in detail](./import.mdx) - [Tutorial: Introduction to modules](./modules.md) - [Tutorial: Introduction to Weaviate Console](/cloud/tools/query-tool.mdx) diff --git a/sidebars.js b/sidebars.js index da496f50..543c395a 100644 --- a/sidebars.js +++ b/sidebars.js @@ -431,10 +431,11 @@ const sidebars = { id: "weaviate/configuration/compression/index", }, items: [ + "weaviate/configuration/compression/rq-compression", "weaviate/configuration/compression/pq-compression", "weaviate/configuration/compression/bq-compression", - "weaviate/configuration/compression/rq-compression", "weaviate/configuration/compression/sq-compression", + "weaviate/configuration/compression/uncompressed", "weaviate/configuration/compression/multi-vectors", ], }, @@ -450,6 +451,7 @@ const sidebars = { items: [ "weaviate/configuration/rbac/manage-roles", "weaviate/configuration/rbac/manage-users", + "weaviate/configuration/rbac/manage-groups", ], }, ], @@ -525,12 +527,13 @@ const sidebars = { }, items: [ "weaviate/tutorials/multi-vector-embeddings", - //"weaviate/tutorials/import", + "weaviate/tutorials/import", + "weaviate/tutorials/collection-aliases", "weaviate/tutorials/cross-references", + "weaviate/tutorials/spark-connector", //"weaviate/tutorials/vector-provision-options", //"weaviate/tutorials/query", //"weaviate/tutorials/wikipedia", - "weaviate/tutorials/spark-connector", //"weaviate/tutorials/modules", ], }, diff --git a/tests/docker-compose-anon-2.yml b/tests/docker-compose-anon-2.yml index 53349bab..316ef2ca 100644 --- a/tests/docker-compose-anon-2.yml +++ b/tests/docker-compose-anon-2.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8090:8080 - 50061:50051 diff --git a/tests/docker-compose-anon-bind.yml b/tests/docker-compose-anon-bind.yml index d4b3fcf3..be02288f 100644 --- a/tests/docker-compose-anon-bind.yml +++ b/tests/docker-compose-anon-bind.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8380:8080 - 50351:50051 diff --git a/tests/docker-compose-anon-clip.yml b/tests/docker-compose-anon-clip.yml index 5f8b4eff..c8a255c0 100644 --- a/tests/docker-compose-anon-clip.yml +++ b/tests/docker-compose-anon-clip.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8280:8080 - 50251:50051 diff --git a/tests/docker-compose-anon-offload.yml b/tests/docker-compose-anon-offload.yml index 0d11dbaf..ee13e521 100644 --- a/tests/docker-compose-anon-offload.yml +++ b/tests/docker-compose-anon-offload.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8080:8080 - 50051:50051 diff --git a/tests/docker-compose-anon.yml b/tests/docker-compose-anon.yml index 4052eda7..b3e593eb 100644 --- a/tests/docker-compose-anon.yml +++ b/tests/docker-compose-anon.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8080:8080 - 50051:50051 diff --git a/tests/docker-compose-rbac.yml b/tests/docker-compose-rbac.yml index 712a09b8..78d31332 100644 --- a/tests/docker-compose-rbac.yml +++ b/tests/docker-compose-rbac.yml @@ -1,4 +1,3 @@ ---- services: weaviate_rbac: command: @@ -8,11 +7,13 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8580:8080 - 50551:50051 restart: on-failure:0 + depends_on: + - keycloak environment: QUERY_DEFAULTS_LIMIT: 25 PERSISTENCE_DATA_PATH: '/var/lib/weaviate' @@ -21,11 +22,44 @@ services: ENABLE_API_BASED_MODULES: 'true' BACKUP_FILESYSTEM_PATH: '/var/lib/weaviate/backups' CLUSTER_HOSTNAME: 'node1' - # AuthN and AuthZ settings AUTHENTICATION_APIKEY_ENABLED: 'true' AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'root-user-key' AUTHENTICATION_APIKEY_USERS: 'root-user' AUTHORIZATION_RBAC_ENABLED: 'true' AUTHORIZATION_RBAC_ROOT_USERS: 'root-user' AUTHENTICATION_DB_USERS_ENABLED: 'true' -... + AUTHENTICATION_OIDC_ENABLED: 'true' + # Changed to use service name instead of localhost + #AUTHENTICATION_OIDC_ISSUER: 'http://localhost:8081/realms/weaviate-test' + AUTHENTICATION_OIDC_ISSUER: 'http://keycloak:8081/realms/weaviate-test' + AUTHENTICATION_OIDC_USERNAME_CLAIM: 'preferred_username' + AUTHENTICATION_OIDC_GROUPS_CLAIM: 'groups' + AUTHENTICATION_OIDC_CLIENT_ID: 'weaviate' + + keycloak: + image: quay.io/keycloak/keycloak:24.0.3 + ports: + - "8081:8081" + environment: + KEYCLOAK_ADMIN: admin + KEYCLOAK_ADMIN_PASSWORD: admin + KC_HTTP_ENABLED: 'true' + KC_HTTP_PORT: '8081' + KC_HOSTNAME_STRICT: 'false' + KC_HOSTNAME_STRICT_HTTPS: 'false' + # Use localhost so it works from both inside and outside Docker + #KC_HOSTNAME: localhost + KC_HOSTNAME_PORT: 8081 + # This makes Keycloak use localhost URLs in OIDC discovery + KC_FRONTEND_URL: 'http://localhost:8081' + command: + - start-dev + volumes: + - keycloak_data:/opt/keycloak/data + # Remove health check for now since Keycloak is working + # healthcheck: + # test: ["CMD", "echo", "healthy"] + # interval: 10s + +volumes: + keycloak_data: \ No newline at end of file diff --git a/tests/docker-compose-three-nodes.yml b/tests/docker-compose-three-nodes.yml index b5323da5..3603eb58 100644 --- a/tests/docker-compose-three-nodes.yml +++ b/tests/docker-compose-three-nodes.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 restart: on-failure:0 ports: - "8180:8080" @@ -36,7 +36,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 restart: on-failure:0 ports: - "8181:8080" @@ -65,7 +65,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 restart: on-failure:0 ports: - "8182:8080" diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index 8c722ff9..6b8dea9b 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -8,7 +8,7 @@ services: - '8080' - --scheme - http - image: cr.weaviate.io/semitechnologies/weaviate:1.32.0 + image: cr.weaviate.io/semitechnologies/weaviate:1.33.0-rc.1 ports: - 8099:8080 - 50052:50051 diff --git a/versions-config.json b/versions-config.json index ca969051..71c42b03 100644 --- a/versions-config.json +++ b/versions-config.json @@ -6,9 +6,9 @@ "weaviate_cli_version": "3.2.2", "weaviate_agents_version": "1.0.0", "python_client_version": "4.17.0", - "go_client_version": "5.4.1", - "java_client_version": "5.4.0", + "go_client_version": "5.5.0", + "java_client_version": "5.5.0", "javascript_client_version": "2.14.5", - "typescript_client_version": "3.8.0", + "typescript_client_version": "3.9.0", "spark_connector_version": "1.4.0" }