diff --git a/src/MongoDB.Driver/CreateAtlasVectorIndexModel.cs b/src/MongoDB.Driver/CreateAtlasVectorIndexModel.cs new file mode 100644 index 00000000000..c8fd6fb706f --- /dev/null +++ b/src/MongoDB.Driver/CreateAtlasVectorIndexModel.cs @@ -0,0 +1,216 @@ +/* Copyright 2010-present MongoDB Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Linq.Expressions; +using MongoDB.Bson; +using MongoDB.Bson.Serialization; + +namespace MongoDB.Driver +{ + /// + /// Defines an Atlas vector search index model using strongly-typed C# APIs. + /// + public class CreateAtlasVectorIndexModel : CreateSearchIndexModel + { + private readonly RenderArgs _renderArgs + = new(BsonSerializer.LookupSerializer(), BsonSerializer.SerializerRegistry); + + /// + /// Initializes a new instance of the class, passing the required + /// options for and number of vector dimensions to the constructor. + /// + /// The index name. + /// The field containing the vectors to index. + /// The to use to search for top K-nearest neighbors. + /// Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time. + /// Fields that may be used as filters in the vector query. + public CreateAtlasVectorIndexModel( + FieldDefinition field, + string name, + VectorSimilarity similarity, + int dimensions, + params FieldDefinition[] filterFields) + : base(name, SearchIndexType.VectorSearch) + { + Field = field; + Similarity = similarity; + Dimensions = dimensions; + FilterFields = filterFields?.ToList() ?? []; + } + + /// + /// Initializes a new instance of the class, passing the required + /// options for and number of vector dimensions to the constructor. + /// + /// The index name. + /// An expression pointing to the field containing the vectors to index. + /// The to use to search for top K-nearest neighbors. + /// Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time. + /// Expressions pointing to fields that may be used as filters in the vector query. + public CreateAtlasVectorIndexModel( + Expression> field, + string name, + VectorSimilarity similarity, + int dimensions, + params Expression>[] filterFields) + : this( + new ExpressionFieldDefinition(field), + name, + similarity, + dimensions, + filterFields?.Select(f => (FieldDefinition)new ExpressionFieldDefinition(f)).ToArray()) + { + Similarity = similarity; + Dimensions = dimensions; + } + + /// + /// The field containing the vectors to index. + /// + public FieldDefinition Field { get; } + + /// + /// The to use to search for top K-nearest neighbors. + /// + public VectorSimilarity Similarity { get; } + + /// + /// Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time. + /// + public int Dimensions { get; } + + /// + /// Fields that may be used as filters in the vector query. + /// + public IReadOnlyList> FilterFields { get; } + + /// + /// Type of automatic vector quantization for your vectors. + /// + public VectorQuantization? Quantization { get; init; } + + /// + /// Maximum number of edges (or connections) that a node can have in the Hierarchical Navigable Small Worlds graph. + /// + public int? HnswMaxEdges { get; init; } + + /// + /// Analogous to numCandidates at query-time, this parameter controls the maximum number of nodes to evaluate to find the closest neighbors to connect to a new node. + /// + public int? HnswNumEdgeCandidates { get; init; } + + // /// Paths to properties that may be used as filters on the entity type or its nested types. + // public IReadOnlyList FilterPaths { get; init; } + + /// + public override SearchIndexType? Type + => SearchIndexType.VectorSearch; + + /// + public override BsonDocument Definition + { + get + { + if (base.Definition != null) + { + return base.Definition; + } + + var similarityValue = Similarity == VectorSimilarity.DotProduct + ? "dotProduct" // Because neither "DotProduct" or "dotproduct" are allowed. + : Similarity.ToString().ToLowerInvariant(); + + var vectorField = new BsonDocument + { + { "type", BsonString.Create("vector") }, + { "path", Field.Render(_renderArgs).FieldName }, + { "numDimensions", BsonInt32.Create(Dimensions) }, + { "similarity", BsonString.Create(similarityValue) }, + }; + + if (Quantization.HasValue) + { + vectorField.Add("quantization", BsonString.Create(Quantization.ToString()?.ToLower())); + } + + if (HnswMaxEdges != null || HnswNumEdgeCandidates != null) + { + var hnswDocument = new BsonDocument + { + { "maxEdges", BsonInt32.Create(HnswMaxEdges ?? 16) }, + { "numEdgeCandidates", BsonInt32.Create(HnswNumEdgeCandidates ?? 100) } + }; + vectorField.Add("hnswOptions", hnswDocument); + } + + var fieldDocuments = new List { vectorField }; + + if (FilterFields != null) + { + foreach (var filterPath in FilterFields) + { + var fieldDocument = new BsonDocument + { + { "type", BsonString.Create("filter") }, + { "path", BsonString.Create(filterPath.Render(_renderArgs).FieldName) } + }; + + fieldDocuments.Add(fieldDocument); + } + } + + base.Definition = new BsonDocument { { "fields", BsonArray.Create(fieldDocuments) } }; + + return base.Definition; + } + } + } + + /// + /// Defines an Atlas vector search index model using strongly-typed C# APIs. + /// + public class CreateAtlasVectorIndexModel : CreateAtlasVectorIndexModel + { + /// + /// Initializes a new instance of the class, passing the required + /// options for and number of vector dimensions to the constructor. + /// + /// The index name. + /// The field containing the vectors to index. + /// The to use to search for top K-nearest neighbors. + /// Number of vector dimensions that Atlas Vector Search enforces at index-time and query-time. + /// Fields that may be used as filters in the vector query. + public CreateAtlasVectorIndexModel( + FieldDefinition field, + string name, + VectorSimilarity similarity, + int dimensions, + params FieldDefinition[] filterFields) + : base(field, name, similarity, dimensions, filterFields) + { + } + } + + + /// + /// TODO + /// + public class CreateAtlasSearchIndexModel + { + } +} diff --git a/src/MongoDB.Driver/CreateSearchIndexModel.cs b/src/MongoDB.Driver/CreateSearchIndexModel.cs index bb5a2498a4f..aa28f1472bc 100644 --- a/src/MongoDB.Driver/CreateSearchIndexModel.cs +++ b/src/MongoDB.Driver/CreateSearchIndexModel.cs @@ -18,40 +18,66 @@ namespace MongoDB.Driver { /// - /// Model for creating a search index. + /// Defines an Atlas vector search index model using a and acts as a base class + /// for different types of Atlas index models, including + /// and for strongly-typed Atlas models. + /// definition. /// - public sealed class CreateSearchIndexModel + public class CreateSearchIndexModel { - /// Gets the index name. - /// The index name. - public string Name { get; } - - /// Gets the index type. - /// The index type. - public SearchIndexType? Type { get; } - - /// Gets the index definition. - /// The definition. - public BsonDocument Definition { get; } + /// + /// Initializes a new instance of the class, passing the index + /// model as a . + /// + /// + /// Consider using or to + /// build Atlas indexes without specifying the BSON directly. + /// + /// The name. + /// The index definition. + public CreateSearchIndexModel(string name, BsonDocument definition) + : this(name, null, definition) + { + } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class, passing the index + /// model as a . /// + /// + /// Consider using or to + /// build Atlas indexes without specifying the BSON directly. + /// /// The name. - /// The definition. - public CreateSearchIndexModel(string name, BsonDocument definition) : this(name, null, definition) { } + /// The type. + /// The index definition. + public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition) + : this(name, type) + { + Definition = definition; + } /// /// Initializes a new instance of the class. /// /// The name. /// The type. - /// The definition. - public CreateSearchIndexModel(string name, SearchIndexType? type, BsonDocument definition) + protected CreateSearchIndexModel(string name, SearchIndexType? type) { Name = name; Type = type; - Definition = definition; } + + /// Gets the index name. + /// The index name. + public virtual string Name { get; } + + /// Gets the index type. + /// The index type. + public virtual SearchIndexType? Type { get; } + + /// Gets the index definition. + /// The definition. + public virtual BsonDocument Definition { get; protected set; } } } diff --git a/src/MongoDB.Driver/VectorQuantization.cs b/src/MongoDB.Driver/VectorQuantization.cs new file mode 100644 index 00000000000..0831210ad3c --- /dev/null +++ b/src/MongoDB.Driver/VectorQuantization.cs @@ -0,0 +1,43 @@ +/* Copyright 2010-present MongoDB Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace MongoDB.Driver +{ + /// + /// Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float + /// or double vectors. See + /// Vector Quantization for more information. + /// + public enum VectorQuantization + { + /// + /// Indicates no automatic quantization for the vector embeddings. Use this setting if you have pre-quantized + /// vectors for ingestion. If omitted, this is the default value. + /// + None, + + /// + /// Indicates scalar quantization, which transforms values to 1 byte integers. + /// + Scalar, + + /// + /// Indicates binary quantization, which transforms values to a single bit. + /// To use this value, numDimensions must be a multiple of 8. + /// If precision is critical, select or instead of . + /// + Binary, + } +} diff --git a/src/MongoDB.Driver/VectorSimilarity.cs b/src/MongoDB.Driver/VectorSimilarity.cs new file mode 100644 index 00000000000..19691f62398 --- /dev/null +++ b/src/MongoDB.Driver/VectorSimilarity.cs @@ -0,0 +1,40 @@ +/* Copyright 2010-present MongoDB Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace MongoDB.Driver +{ + /// + /// Vector similarity function to use to search for top K-nearest neighbors. + /// See How to Index Fields for + /// Vector Search for more information. + /// + public enum VectorSimilarity + { + /// + /// Measures the distance between ends of vectors. + /// + Euclidean, + + /// + /// Measures similarity based on the angle between vectors. + /// + Cosine, + + /// + /// mMasures similarity like cosine, but takes into account the magnitude of the vector. + /// + DotProduct, + } +} diff --git a/tests/MongoDB.Driver.Tests/Search/AtlasSearchIndexManagmentTests.cs b/tests/MongoDB.Driver.Tests/Search/AtlasSearchIndexManagmentTests.cs index 798648fceec..c823c17f5ef 100644 --- a/tests/MongoDB.Driver.Tests/Search/AtlasSearchIndexManagmentTests.cs +++ b/tests/MongoDB.Driver.Tests/Search/AtlasSearchIndexManagmentTests.cs @@ -37,6 +37,7 @@ public class AtlasSearchIndexManagementTests : LoggableTestClass private readonly IMongoDatabase _database; private readonly IMongoClient _mongoClient; private readonly BsonDocument _indexDefinition = BsonDocument.Parse("{ mappings: { dynamic: false } }"); + private readonly BsonDocument _indexDefinitionWithFields = BsonDocument.Parse("{ mappings: { dynamic: false, fields: { } } }"); private readonly BsonDocument _vectorIndexDefinition = BsonDocument.Parse("{ fields: [ { type: 'vector', path: 'plot_embedding', numDimensions: 1536, similarity: 'euclidean' } ] }"); public AtlasSearchIndexManagementTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper) @@ -68,10 +69,16 @@ public Task Case1_driver_should_successfully_create_and_list_search_indexes( [Theory(Timeout = Timeout)] [ParameterAttributeData] public async Task Case2_driver_should_successfully_create_multiple_indexes_in_batch( - [Values(false, true)] bool async) + [Values(false, true)] bool async, + [Values(false, true)] bool includeFields) { - var indexDefinition1 = new CreateSearchIndexModel(async ? "test-search-index-1-async" : "test-search-index-1", _indexDefinition); - var indexDefinition2 = new CreateSearchIndexModel(async ? "test-search-index-2-async" : "test-search-index-2", _indexDefinition); + var indexDefinition1 = new CreateSearchIndexModel( + async ? "test-search-index-1-async" : "test-search-index-1", + includeFields ? _indexDefinitionWithFields : _indexDefinition); + + var indexDefinition2 = new CreateSearchIndexModel( + async ? "test-search-index-2-async" : "test-search-index-2", + includeFields ? _indexDefinitionWithFields : _indexDefinition); var indexNamesActual = async ? await _collection.SearchIndexes.CreateManyAsync(new[] { indexDefinition1, indexDefinition2 }) @@ -81,8 +88,8 @@ public async Task Case2_driver_should_successfully_create_multiple_indexes_in_ba var indexes = await GetIndexes(async, indexDefinition1.Name, indexDefinition2.Name); - indexes[0]["latestDefinition"].AsBsonDocument.Should().Be(_indexDefinition); - indexes[1]["latestDefinition"].AsBsonDocument.Should().Be(_indexDefinition); + indexes[0]["latestDefinition"].AsBsonDocument.Should().Be(_indexDefinitionWithFields); + indexes[1]["latestDefinition"].AsBsonDocument.Should().Be(_indexDefinitionWithFields); } [Theory(Timeout = Timeout)] @@ -130,7 +137,7 @@ public async Task Case4_driver_can_update_a_search_index( [Values(false, true)] bool async) { var indexName = async ? "test-search-index-async" : "test-search-index"; - var indexNewDefinition = BsonDocument.Parse("{ mappings: { dynamic: true }}"); + var indexNewDefinition = BsonDocument.Parse("{ mappings: { dynamic: true, fields: { } }}"); await CreateIndexAndValidate(indexName, _indexDefinition, async); if (async) @@ -166,7 +173,8 @@ public async Task Case5_dropSearchIndex_suppresses_namespace_not_found_errors( [Theory(Timeout = Timeout)] [ParameterAttributeData] public async Task Case6_driver_can_create_and_list_search_indexes_with_non_default_read_write_concern( - [Values(false, true)] bool async) + [Values(false, true)] bool async, + [Values(false, true)] bool includeFields) { var indexName = async ? "test-search-index-case6-async" : "test-search-index-case6"; @@ -175,13 +183,18 @@ public async Task Case6_driver_can_create_and_list_search_indexes_with_non_defau .WithWriteConcern(WriteConcern.WMajority); var indexNameCreated = async - ? await collection.SearchIndexes.CreateOneAsync(_indexDefinition, indexName) - : collection.SearchIndexes.CreateOne(_indexDefinition, indexName); + ? await collection.SearchIndexes.CreateOneAsync(includeFields + ? _indexDefinitionWithFields + : _indexDefinition, indexName) + : collection.SearchIndexes.CreateOne( + includeFields + ? _indexDefinitionWithFields + : _indexDefinition, indexName); indexNameCreated.Should().Be(indexName); var indexes = await GetIndexes(async, indexName); - indexes[0]["latestDefinition"].AsBsonDocument.Should().Be(_indexDefinition); + indexes[0]["latestDefinition"].AsBsonDocument.Should().Be(_indexDefinitionWithFields); } [Theory(Timeout = Timeout)] @@ -231,10 +244,178 @@ public async Task Case8_driver_requires_explicit_type_to_create_vector_search_in var indexName = async ? "test-search-index-case8-error-async" : "test-search-index-case8-error"; var exception = async - ? await Record.ExceptionAsync(() => _collection.SearchIndexes.CreateOneAsync(_vectorIndexDefinition, indexName)) - : Record.Exception(() => _collection.SearchIndexes.CreateOne(_vectorIndexDefinition, indexName)); + ? await Record.ExceptionAsync(() => _collection.SearchIndexes.CreateOneAsync( + new CreateSearchIndexModel(indexName, _vectorIndexDefinition).Definition, indexName)) + : Record.Exception(() => _collection.SearchIndexes.CreateOne( + new CreateSearchIndexModel(indexName, _vectorIndexDefinition).Definition, indexName)); + + exception.Message.Should().Contain("Command createSearchIndexes failed: \"userCommand.indexes[0].mappings\" is required."); + } + + [Theory(Timeout = Timeout)] + [ParameterAttributeData] + public async Task Can_create_Atlas_vector_index_for_all_options_using_typed_API( + [Values(false, true)] bool async) + { + var indexName = async ? "test-index-vector-optional-async" : "test-index-vector-optional"; + + var indexModel = new CreateAtlasVectorIndexModel( + e => e.Floats, indexName, VectorSimilarity.Cosine, dimensions: 2) + { + HnswMaxEdges = 18, HnswNumEdgeCandidates = 102, Quantization = VectorQuantization.Scalar + }; + + var createdName = async + ? await _collection.SearchIndexes.CreateOneAsync(indexModel) + : _collection.SearchIndexes.CreateOne(indexModel); + + createdName.Should().Be(indexName); + + var index = (await GetIndexes(async, indexName))[0]; + index["type"].AsString.Should().Be("vectorSearch"); + + var fields = index["latestDefinition"].AsBsonDocument["fields"].AsBsonArray; + fields.Count.Should().Be(1); - exception.Message.Should().Contain("Attribute mappings missing"); + var indexField = fields[0].AsBsonDocument; + indexField["type"].AsString.Should().Be("vector"); + indexField["path"].AsString.Should().Be("Floats"); + indexField["numDimensions"].AsInt32.Should().Be(2); + indexField["similarity"].AsString.Should().Be("cosine"); + indexField["quantization"].AsString.Should().Be("scalar"); + indexField["hnswOptions"].AsBsonDocument["maxEdges"].AsInt32.Should().Be(18); + indexField["hnswOptions"].AsBsonDocument["numEdgeCandidates"].AsInt32.Should().Be(102); + } + + [Theory(Timeout = Timeout)] + [ParameterAttributeData] + public async Task Can_create_Atlas_vector_index_for_required_only_options_using_typed_API( + [Values(false, true)] bool async) + { + var indexName = async ? "test-index-vector-required-async" : "test-index-vector-required"; + + var indexModel = new CreateAtlasVectorIndexModel("vectors", indexName, VectorSimilarity.Euclidean, dimensions: 4); + + var createdName = async + ? await _collection.SearchIndexes.CreateOneAsync(indexModel) + : _collection.SearchIndexes.CreateOne(indexModel); + + createdName.Should().Be(indexName); + + var index = (await GetIndexes(async, indexName))[0]; + index["type"].AsString.Should().Be("vectorSearch"); + + var fields = index["latestDefinition"].AsBsonDocument["fields"].AsBsonArray; + fields.Count.Should().Be(1); + + var indexField = fields[0].AsBsonDocument; + indexField["type"].AsString.Should().Be("vector"); + indexField["path"].AsString.Should().Be("vectors"); + indexField["numDimensions"].AsInt32.Should().Be(4); + indexField["similarity"].AsString.Should().Be("euclidean"); + + indexField.Contains("quantization").Should().Be(false); + indexField.Contains("hnswOptions").Should().Be(false); + } + + [Theory(Timeout = Timeout)] + [ParameterAttributeData] + public async Task Can_create_Atlas_vector_index_for_all_options_using_typed_API_with_filters( + [Values(false, true)] bool async) + { + var indexName = async ? "test-index-vector-typed-filters-async" : "test-index-typed-filters"; + + var indexModel = new CreateAtlasVectorIndexModel( + e => e.Floats, + indexName, + VectorSimilarity.Cosine, + dimensions: 2, + e => e.Filter1, e => e.Filter2, e => e.Filter3) + { + HnswMaxEdges = 18, + HnswNumEdgeCandidates = 102, + Quantization = VectorQuantization.Scalar, + }; + + var createdName = async + ? await _collection.SearchIndexes.CreateOneAsync(indexModel) + : _collection.SearchIndexes.CreateOne(indexModel); + + createdName.Should().Be(indexName); + + var index = (await GetIndexes(async, indexName))[0]; + index["type"].AsString.Should().Be("vectorSearch"); + + var fields = index["latestDefinition"].AsBsonDocument["fields"].AsBsonArray; + fields.Count.Should().Be(4); + + var indexField = fields[0].AsBsonDocument; + indexField["type"].AsString.Should().Be("vector"); + indexField["path"].AsString.Should().Be("Floats"); + indexField["numDimensions"].AsInt32.Should().Be(2); + indexField["similarity"].AsString.Should().Be("cosine"); + indexField["quantization"].AsString.Should().Be("scalar"); + indexField["hnswOptions"].AsBsonDocument["maxEdges"].AsInt32.Should().Be(18); + indexField["hnswOptions"].AsBsonDocument["numEdgeCandidates"].AsInt32.Should().Be(102); + + for (var i = 1; i <= 3; i++) + { + var filterField = fields[i].AsBsonDocument; + filterField["type"].AsString.Should().Be("filter"); + filterField["path"].AsString.Should().Be($"Filter{i}"); + } + } + + [Theory(Timeout = Timeout)] + [ParameterAttributeData] + public async Task Can_create_Atlas_vector_index_for_required_only_options_using_typed_API_with_filters( + [Values(false, true)] bool async) + { + var indexName = async ? "test-index-untyped-filters-async" : "test-index-untyped-filters"; + + var indexModel = new CreateAtlasVectorIndexModel( + "vectors", + indexName, + VectorSimilarity.Euclidean, + dimensions: 4, + "f1", "f2", "f3"); + + var createdName = async + ? await _collection.SearchIndexes.CreateOneAsync(indexModel) + : _collection.SearchIndexes.CreateOne(indexModel); + + createdName.Should().Be(indexName); + + var index = (await GetIndexes(async, indexName))[0]; + index["type"].AsString.Should().Be("vectorSearch"); + + var fields = index["latestDefinition"].AsBsonDocument["fields"].AsBsonArray; + fields.Count.Should().Be(4); + + var indexField = fields[0].AsBsonDocument; + indexField["type"].AsString.Should().Be("vector"); + indexField["path"].AsString.Should().Be("vectors"); + indexField["numDimensions"].AsInt32.Should().Be(4); + indexField["similarity"].AsString.Should().Be("euclidean"); + + indexField.Contains("quantization").Should().Be(false); + indexField.Contains("hnswOptions").Should().Be(false); + + for (var i = 1; i <= 3; i++) + { + var filterField = fields[i].AsBsonDocument; + filterField["type"].AsString.Should().Be("filter"); + filterField["path"].AsString.Should().Be($"f{i}"); + } + } + + private class EntityWithVector + { + public ObjectId Id { get; set; } + public float[] Floats { get; set; } + public bool Filter1 { get; set; } + public string Filter2 { get; set; } + public int Filter3 { get; set; } } private async Task CreateIndexAndValidate(string indexName, BsonDocument indexDefinition, bool async)