Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions benchmarks-jmh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,21 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<additionalJOptions>
<additionalJOption>--add-modules=jdk.incubator.vector</additionalJOption>
</additionalJOptions>
<release>22</release>
<detectOfflineLinks>false</detectOfflineLinks>
<includeDependencySources>true</includeDependencySources>
<dependencySourceIncludes>
<dependencySourceInclude>io.github.jbellis:*</dependencySourceInclude>
</dependencySourceIncludes>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,23 @@

import io.github.jbellis.jvector.vector.types.VectorTypeSupport;


/**
* JMH benchmark for measuring graph index construction performance using randomly generated vectors.
* This benchmark evaluates the time required to build a graph index with configurable parameters
* including vector dimensionality, dataset size, and optional Product Quantization (PQ) compression.
*
* <p>The benchmark tests various configurations to assess how different factors affect index
* construction time, including the impact of using PQ compression during the build process.</p>
*
* <p>Key parameters:</p>
* <ul>
* <li>Vector dimensionality: 768 or 1536 dimensions</li>
* <li>Dataset size: 100,000 vectors</li>
* <li>PQ subspaces: 0 (no compression) or 16 subspaces</li>
* <li>Graph degree (M): 32 neighbors per node</li>
* <li>Beam width: 100 for construction search</li>
* </ul>
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
Expand All @@ -48,17 +64,45 @@
public class IndexConstructionWithRandomSetBenchmark {
private static final Logger log = LoggerFactory.getLogger(IndexConstructionWithRandomSetBenchmark.class);
private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();

/** The vector values to be indexed, initialized during setup. */
private RandomAccessVectorValues ravv;

/** The score provider used during graph construction, either exact or PQ-based. */
private BuildScoreProvider buildScoreProvider;
private int M = 32; // graph degree

/** The maximum degree of the graph (number of neighbors per node). */
private int M = 32;

/** The beam width used during graph construction searches. */
private int beamWidth = 100;

/** The dimensionality of vectors being indexed. */
@Param({"768", "1536"})
private int originalDimension;

/** The number of vectors in the dataset to be indexed. */
@Param({/*"10000",*/ "100000"/*, "1000000"*/})
int numBaseVectors;

/** The number of PQ subspaces to use, or 0 for no compression. */
@Param({"0", "16"})
private int numberOfPQSubspaces;

/**
* Constructs a new benchmark instance. JMH will instantiate this class
* and populate the @Param fields before calling setup methods.
*/
public IndexConstructionWithRandomSetBenchmark() {
// JMH-managed lifecycle
}

/**
* Initializes the benchmark state by generating random vectors and configuring
* the appropriate score provider based on whether PQ compression is enabled.
*
* @throws IOException if an error occurs during setup
*/
@Setup(Level.Trial)
public void setup() throws IOException {

Expand Down Expand Up @@ -86,11 +130,25 @@ public void setup() throws IOException {

}

/**
* Tears down resources after each benchmark invocation.
* Currently performs no operations but is included for future resource cleanup needs.
*
* @throws IOException if an error occurs during teardown
*/
@TearDown(Level.Invocation)
public void tearDown() throws IOException {

}

/**
* The main benchmark method that measures the time to build a graph index.
* Constructs a complete graph index from the configured vectors using the
* specified parameters and score provider.
*
* @param blackhole JMH blackhole to prevent dead code elimination
* @throws IOException if an error occurs during index construction
*/
@Benchmark
public void buildIndexBenchmark(Blackhole blackhole) throws IOException {
// score provider using the raw, in-memory vectors
Expand All @@ -100,6 +158,13 @@ public void buildIndexBenchmark(Blackhole blackhole) throws IOException {
}
}

/**
* Creates a random vector with the specified dimensionality.
* Each component is randomly generated using {@link Math#random()}.
*
* @param dimension the number of dimensions in the vector
* @return a newly created random vector
*/
private VectorFloat<?> createRandomVector(int dimension) {
VectorFloat<?> vector = VECTOR_TYPE_SUPPORT.createFloatVector(dimension);
for (int i = 0; i < dimension; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,23 @@
import java.util.List;
import java.util.concurrent.TimeUnit;

/**
* JMH benchmark for measuring graph index construction performance using the SIFT dataset.
* This benchmark evaluates index construction time with a fixed, real-world dataset,
* testing various combinations of graph degree (M) and beam width parameters.
*
* <p>Unlike {@link IndexConstructionWithRandomSetBenchmark}, this benchmark uses the
* actual SIFT dataset loaded from disk, providing more realistic performance measurements
* that account for real data characteristics.</p>
*
* <p>Key parameters:</p>
* <ul>
* <li>Graph degree (M): 16, 32, or 64 neighbors per node</li>
* <li>Beam width: 10 or 100 for construction search</li>
* <li>Dataset: SIFT small dataset (10,000 vectors, 128 dimensions)</li>
* <li>Similarity function: Euclidean distance</li>
* </ul>
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
Expand All @@ -40,17 +57,47 @@
@Threads(1)
public class IndexConstructionWithStaticSetBenchmark {
private static final Logger log = LoggerFactory.getLogger(IndexConstructionWithStaticSetBenchmark.class);

/** The vector values to be indexed, loaded from the SIFT dataset. */
private RandomAccessVectorValues ravv;

/** The base vectors from the SIFT dataset. */
private List<VectorFloat<?>> baseVectors;

/** The query vectors from the SIFT dataset (loaded but not used in this benchmark). */
private List<VectorFloat<?>> queryVectors;

/** The ground truth nearest neighbors (loaded but not used in this benchmark). */
private List<List<Integer>> groundTruth;

/** The score provider used during graph construction. */
private BuildScoreProvider bsp;

/** The maximum degree of the graph (number of neighbors per node). */
@Param({"16", "32", "64"})
private int M; // graph degree
private int M;

/** The beam width used during graph construction searches. */
@Param({"10", "100"})
private int beamWidth;

/** The dimensionality of vectors in the dataset. */
int originalDimension;

/**
* Constructs a new benchmark instance. JMH will instantiate this class
* and populate the @Param fields before calling setup methods.
*/
public IndexConstructionWithStaticSetBenchmark() {
// JMH-managed lifecycle
}

/**
* Initializes the benchmark state by loading the SIFT dataset from disk
* and configuring the score provider.
*
* @throws IOException if an error occurs loading the dataset files
*/
@Setup
public void setup() throws IOException {
var siftPath = "siftsmall";
Expand All @@ -67,13 +114,25 @@ public void setup() throws IOException {
bsp = BuildScoreProvider.randomAccessScoreProvider(ravv, VectorSimilarityFunction.EUCLIDEAN);
}

/**
* Cleans up resources after the benchmark completes by clearing all vector collections.
*
* @throws IOException if an error occurs during teardown
*/
@TearDown
public void tearDown() throws IOException {
baseVectors.clear();
queryVectors.clear();
groundTruth.clear();
}

/**
* The main benchmark method that measures the time to build a graph index
* from the loaded SIFT dataset using the configured parameters.
*
* @param blackhole JMH blackhole to prevent dead code elimination
* @throws IOException if an error occurs during index construction
*/
@Benchmark
public void buildIndexBenchmark(Blackhole blackhole) throws IOException {
// score provider using the raw, in-memory vectors
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,28 +50,76 @@
@Threads(1)
public class PQDistanceCalculationBenchmark {
private static final Logger log = LoggerFactory.getLogger(PQDistanceCalculationBenchmark.class);

/**
* Creates a new benchmark instance.
* <p>
* This constructor is invoked by JMH and should not be called directly.
*/
public PQDistanceCalculationBenchmark() {
}
private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
private final VectorSimilarityFunction vsf = VectorSimilarityFunction.EUCLIDEAN;

/** The base vectors used for distance calculations. */
private List<VectorFloat<?>> vectors;

/** Product-quantized versions of the base vectors, or null if M=0. */
private PQVectors pqVectors;

/** Query vectors used to test distance calculations. */
private List<VectorFloat<?>> queryVectors;

/** The Product Quantization model, or null if M=0. */
private ProductQuantization pq;

/** Score provider configured for either full precision or PQ-based scoring. */
private BuildScoreProvider buildScoreProvider;


/**
* The dimensionality of the vectors.
* <p>
* Default value: 1536 (typical for modern embedding models).
*/
@Param({"1536"})
private int dimension;


/**
* The number of base vectors to create for the dataset.
* <p>
* Default value: 10000
*/
@Param({"10000"})
private int vectorCount;


/**
* The number of query vectors to test against the dataset.
* <p>
* Default value: 100
*/
@Param({"100"})
private int queryCount;


/**
* The number of subspaces for Product Quantization.
* <p>
* When M=0, uses full precision vectors without quantization.
* When M&gt;0, splits each vector into M subspaces for compression.
* Values: 0 (no PQ), 16, 64, 192
*/
@Param({"0", "16", "64", "192"})
private int M; // Number of subspaces for PQ
private int M;


/**
* Sets up the benchmark by creating random vectors and configuring score providers.
* <p>
* This method creates the specified number of base vectors and query vectors with random
* values. If M&gt;0, it also computes Product Quantization and creates PQ-encoded vectors.
* The appropriate score provider is then configured based on whether PQ is used.
*
* @throws IOException if there is an error during setup
*/
@Setup
public void setup() throws IOException {
log.info("Creating dataset with dimension: {}, vector count: {}, query count: {}", dimension, vectorCount, queryCount);
Expand Down Expand Up @@ -100,6 +148,16 @@ public void setup() throws IOException {
log.info("Created dataset with dimension: {}, vector count: {}, query count: {}", dimension, vectorCount, queryCount);
}

/**
* Benchmarks distance calculation using cached search score providers.
* <p>
* This benchmark measures the performance of calculating distances between query vectors
* and all base vectors using a search score provider that caches precomputed values for
* the query vector. This represents the typical search scenario where a query is compared
* against many candidates.
*
* @param blackhole JMH blackhole to prevent dead code elimination
*/
@Benchmark
public void cachedDistanceCalculation(Blackhole blackhole) {
float totalSimilarity = 0;
Expand All @@ -115,6 +173,16 @@ public void cachedDistanceCalculation(Blackhole blackhole) {
blackhole.consume(totalSimilarity);
}

/**
* Benchmarks distance calculation for diversity scoring.
* <p>
* This benchmark measures the performance of calculating distances between base vectors
* using diversity score providers. This represents the scenario where vectors in the
* dataset are compared against each other to assess diversity, such as during graph
* construction or result reranking.
*
* @param blackhole JMH blackhole to prevent dead code elimination
*/
@Benchmark
public void diversityCalculation(Blackhole blackhole) {
float totalSimilarity = 0;
Expand All @@ -130,6 +198,15 @@ public void diversityCalculation(Blackhole blackhole) {
blackhole.consume(totalSimilarity);
}

/**
* Creates a random vector with the specified dimension.
* <p>
* Each component of the vector is assigned a random floating-point value
* between 0.0 (inclusive) and 1.0 (exclusive).
*
* @param dimension the number of dimensions for the vector
* @return a new random vector
*/
private VectorFloat<?> createRandomVector(int dimension) {
VectorFloat<?> vector = VECTOR_TYPE_SUPPORT.createFloatVector(dimension);
for (int i = 0; i < dimension; i++) {
Expand Down
Loading