Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@
<dependency groupId="org.apache.lucene" artifactId="lucene-core" version="9.8.0" />
<dependency groupId="org.apache.lucene" artifactId="lucene-analysis-common" version="9.8.0" />
<dependency groupId="org.apache.lucene" artifactId="lucene-backward-codecs" version="9.8.0" />
<dependency groupId="io.github.jbellis" artifactId="jvector" version="4.0.0-rc.3" />
<dependency groupId="io.github.jbellis" artifactId="jvector" version="4.0.0-rc.4-SNAPSHOT" />
<dependency groupId="com.bpodgursky" artifactId="jbool_expressions" version="1.14" scope="test"/>

<dependency groupId="com.carrotsearch.randomizedtesting" artifactId="randomizedtesting-runner" version="2.1.2" scope="test">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ public enum CassandraRelevantProperties
SAI_VECTOR_FLUSH_THRESHOLD_MAX_ROWS("cassandra.sai.vector_flush_threshold_max_rows", "-1"),
// Use non-positive value to disable it. Period in millis to trigger a flush for SAI vector memtable index.
SAI_VECTOR_FLUSH_PERIOD_IN_MILLIS("cassandra.sai.vector_flush_period_in_millis", "-1"),
// Whether compaction should build vector indexes using fused adc
SAI_VECTOR_ENABLE_FUSED("cassandra.sai.vector.enable_fused", "true"),
/**
* Whether to disable auto-compaction
*/
Expand Down
1 change: 0 additions & 1 deletion src/java/org/apache/cassandra/index/sai/IndexContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import org.slf4j.LoggerFactory;

import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import org.apache.cassandra.config.CassandraRelevantProperties;
import org.apache.cassandra.cql3.Operator;
import org.apache.cassandra.cql3.statements.schema.IndexTarget;
import org.apache.cassandra.db.ClusteringComparator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.cassandra.index.sai.disk.v5.V5OnDiskFormat;
import org.apache.cassandra.index.sai.disk.v6.V6OnDiskFormat;
import org.apache.cassandra.index.sai.disk.v7.V7OnDiskFormat;
import org.apache.cassandra.index.sai.disk.v8.V8OnDiskFormat;
import org.apache.cassandra.index.sai.utils.TypeUtil;
import org.apache.cassandra.io.sstable.format.SSTableFormat;
import org.apache.cassandra.schema.SchemaConstants;
Expand Down Expand Up @@ -69,10 +70,12 @@ public class Version implements Comparable<Version>
public static final Version EC = new Version("ec", V7OnDiskFormat.instance, (c, i, g) -> stargazerFileNameFormat(c, i, g, "ec"));
// total terms count serialization in index metadata, enables ANN_USE_SYNTHETIC_SCORE by default
public static final Version ED = new Version("ed", V7OnDiskFormat.instance, (c, i, g) -> stargazerFileNameFormat(c, i, g, "ed"));
// jvector file format version 6 (skipped 5)
public static final Version FA = new Version("fa", V8OnDiskFormat.instance, (c, i, g) -> stargazerFileNameFormat(c, i, g, "fa"));

// These are in reverse-chronological order so that the latest version is first. Version matching tests
// are more likely to match the latest version, so we want to test that one first.
public static final List<Version> ALL = Lists.newArrayList(ED, EC, EB, DC, DB, CA, BA, AA);
public static final List<Version> ALL = Lists.newArrayList(FA, ED, EC, EB, DC, DB, CA, BA, AA);

public static final Version EARLIEST = AA;
public static final Version VECTOR_EARLIEST = BA;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.cassandra.index.sai.disk.v8;

import org.apache.cassandra.index.sai.disk.v7.V7OnDiskFormat;

public class V8OnDiskFormat extends V7OnDiskFormat
{
public static final V8OnDiskFormat instance = new V8OnDiskFormat();

@Override
public int jvectorFileFormatVersion()
{
return 6;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.github.jbellis.jvector.graph.GraphIndex;
import io.github.jbellis.jvector.graph.ImmutableGraphIndex;
import io.github.jbellis.jvector.graph.GraphSearcher;
import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
Expand Down Expand Up @@ -72,7 +72,7 @@ public class CassandraDiskAnn
private final FileHandle graphHandle;
private final OnDiskOrdinalsMap ordinalsMap;
private final Set<FeatureId> features;
private final GraphIndex graph;
private final ImmutableGraphIndex graph;
private final VectorSimilarityFunction similarityFunction;
@Nullable
private final CompressedVectors compressedVectors;
Expand All @@ -94,7 +94,7 @@ public CassandraDiskAnn(SSTableContext sstableContext, SegmentMetadata.Component

SegmentMetadata.ComponentMetadata termsMetadata = this.componentMetadatas.get(IndexComponentType.TERMS_DATA);
graphHandle = indexFiles.termsData();
var rawGraph = OnDiskGraphIndex.load(graphHandle::createReader, termsMetadata.offset);
var rawGraph = OnDiskGraphIndex.load(graphHandle::createReader, termsMetadata.offset, false);
features = rawGraph.getFeatureSet();
graph = rawGraph;

Expand All @@ -117,7 +117,7 @@ public CassandraDiskAnn(SSTableContext sstableContext, SegmentMetadata.Component
}

VectorCompression.CompressionType compressionType = VectorCompression.CompressionType.values()[reader.readByte()];
if (features.contains(FeatureId.FUSED_ADC))
if (features.contains(FeatureId.FUSED_PQ))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@marianotepper - I just noticed that the features map already has logic that loads the ProductQuantization, meaning this branch currently keeps two identical maps in memory. I think it'd make sense to possibly expose the features map in the OnDiskGraphIndex so we can remove the duplicate cost. Any reason we can't do that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we actually already use the one from the header, I just didn't catch it. I think we'll be able to get rid of it with a little extra work in CC.

{
assert compressionType == VectorCompression.CompressionType.PRODUCT_QUANTIZATION;
compressedVectors = null;
Expand Down Expand Up @@ -231,11 +231,9 @@ public CloseableIterator<RowIdWithScore> search(VectorFloat<?> queryVector,
searcher.usePruning(usePruning);
try
{
var view = (GraphIndex.ScoringView) searcher.getView();
var view = (ImmutableGraphIndex.ScoringView) searcher.getView();
SearchScoreProvider ssp;
// FusedADC can no longer be written due to jvector upgrade. However, it's possible these index files
// still exist, so we have to support them.
if (features.contains(FeatureId.FUSED_ADC))
if (features.contains(FeatureId.FUSED_PQ))
{
var asf = view.approximateScoreFunctionFor(queryVector, similarityFunction);
var rr = isRerankless ? null : view.rerankerFor(queryVector, similarityFunction);
Expand Down Expand Up @@ -311,9 +309,9 @@ public OrdinalsView getOrdinalsView()
return ordinalsMap.getOrdinalsView();
}

public GraphIndex.ScoringView getView()
public ImmutableGraphIndex.ScoringView getView()
{
return (GraphIndex.ScoringView) graph.getView();
return (ImmutableGraphIndex.ScoringView) graph.getView();
}

public boolean containsUnitVectors()
Expand Down
Loading