Skip to content
5 changes: 5 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ http://s.apache.org/luceneversions

API Changes
---------------------
* GITHUB#15113: Move long[] group varint methods to backward-codecs. Deprecated long[] group varint
methods have been removed from core GroupVIntUtil and DataOutput classes and moved to backward-codecs
module to prevent accidental usage in benchmarks while maintaining compatibility for legacy codecs.
(Sakshi Chourasia)

* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)

* GITHUB#14165: TieredMergePolicy's maxMergeAtOnce parameter was removed. (Adrien Grand)
Expand Down
1 change: 1 addition & 0 deletions lucene/backward-codecs/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
exports org.apache.lucene.backward_codecs.lucene103;
exports org.apache.lucene.backward_codecs.packed;
exports org.apache.lucene.backward_codecs.store;
exports org.apache.lucene.backward_codecs.util;

provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
package org.apache.lucene.backward_codecs.lucene912;

import java.io.IOException;
import org.apache.lucene.backward_codecs.store.DataOutputUtil;
import org.apache.lucene.backward_codecs.util.GroupVIntUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.GroupVIntUtil;

/** Utility class to encode/decode postings block. */
final class PostingsUtil {
Expand Down Expand Up @@ -61,7 +62,7 @@ static void writeVIntBlock(
docBuffer[i] = (docBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0);
}
}
docOut.writeGroupVInts(docBuffer, num);
DataOutputUtil.writeGroupVInts(docOut, docBuffer, num);
if (writeFreqs) {
for (int i = 0; i < num; i++) {
final int freq = (int) freqBuffer[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
package org.apache.lucene.backward_codecs.lucene99;

import java.io.IOException;
import org.apache.lucene.backward_codecs.store.DataOutputUtil;
import org.apache.lucene.backward_codecs.util.GroupVIntUtil;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.GroupVIntUtil;

/** Utility class to encode/decode postings block. */
final class PostingsUtil {
Expand Down Expand Up @@ -61,7 +62,7 @@ static void writeVIntBlock(
docBuffer[i] = (docBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0);
}
}
docOut.writeGroupVInts(docBuffer, num);
DataOutputUtil.writeGroupVInts(docOut, docBuffer, num);
if (writeFreqs) {
for (int i = 0; i < num; i++) {
final int freq = (int) freqBuffer[i];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.store;

import java.io.IOException;
import org.apache.lucene.backward_codecs.util.GroupVIntUtil;
import org.apache.lucene.store.DataOutput;

/**
* Utility methods for DataOutput operations that are only used by backward codecs.
*
* @lucene.internal
*/
public final class DataOutputUtil {

private DataOutputUtil() {} // no instance

/**
* Encode integers using group-varint. It uses {@link DataOutput#writeVInt VInt} to encode tail
* values that are not enough for a group. we need a long[] because this is what postings are
* using, all longs are actually required to be integers.
*
* @param values the values to write
* @param limit the number of values to write.
* @lucene.experimental
*/
public static void writeGroupVInts(DataOutput out, long[] values, int limit) throws IOException {
byte[] groupVIntBytes = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP];
GroupVIntUtil.writeGroupVInts(out, groupVIntBytes, values, limit);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.util;

import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BitUtil;

/**
* This class contains utility methods for group varint encoding/decoding of long[] arrays. These
* methods are only used by backward codecs and have been moved here from the main GroupVIntUtil
* class.
*
* @lucene.internal
*/
public final class GroupVIntUtil {
/** The maximum length of a single group-varint is 1 byte flag and 4 integers. */
public static final int MAX_LENGTH_PER_GROUP = Byte.BYTES + 4 * Integer.BYTES;

private GroupVIntUtil() {} // no instance

/**
* Read all the group varints, including the tail vints to a long[].
*
* @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
*/
public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
readGroupVInt(in, dst, i);
}
for (; i < limit; ++i) {
dst[i] = in.readVInt() & 0xFFFFFFFFL;
}
}

/**
* Default implementation of read single group, for optimal performance, you should use {@link
* GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead.
*
* @param in the input to use to read data.
* @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints.
*/
public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException {
final int flag = in.readByte() & 0xFF;

final int n1Minus1 = flag >> 6;
final int n2Minus1 = (flag >> 4) & 0x03;
final int n3Minus1 = (flag >> 2) & 0x03;
final int n4Minus1 = flag & 0x03;

dst[offset] = readIntInGroup(in, n1Minus1) & 0xFFFFFFFFL;
dst[offset + 1] = readIntInGroup(in, n2Minus1) & 0xFFFFFFFFL;
dst[offset + 2] = readIntInGroup(in, n3Minus1) & 0xFFFFFFFFL;
dst[offset + 3] = readIntInGroup(in, n4Minus1) & 0xFFFFFFFFL;
}

private static int readIntInGroup(DataInput in, int numBytesMinus1) throws IOException {
switch (numBytesMinus1) {
case 0:
return in.readByte() & 0xFF;
case 1:
return in.readShort() & 0xFFFF;
case 2:
return (in.readShort() & 0xFFFF) | ((in.readByte() & 0xFF) << 16);
default:
return in.readInt();
}
}

private static int numBytes(int v) {
// | 1 to return 1 when v = 0
return Integer.BYTES - (Integer.numberOfLeadingZeros(v | 1) >> 3);
}

private static int toInt(long value) {
if ((Long.compareUnsigned(value, 0xFFFFFFFFL) > 0)) {
throw new ArithmeticException("integer overflow");
}
return (int) value;
}

/**
* The implementation for group-varint encoding, It uses a maximum of {@link
* #MAX_LENGTH_PER_GROUP} bytes scratch buffer.
*/
public static void writeGroupVInts(DataOutput out, byte[] scratch, long[] values, int limit)
throws IOException {
int readPos = 0;

// encode each group
while ((limit - readPos) >= 4) {
int writePos = 0;
final int n1Minus1 = numBytes(toInt(values[readPos])) - 1;
final int n2Minus1 = numBytes(toInt(values[readPos + 1])) - 1;
final int n3Minus1 = numBytes(toInt(values[readPos + 2])) - 1;
final int n4Minus1 = numBytes(toInt(values[readPos + 3])) - 1;
int flag = (n1Minus1 << 6) | (n2Minus1 << 4) | (n3Minus1 << 2) | (n4Minus1);
scratch[writePos++] = (byte) flag;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n1Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n2Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n3Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++]));
writePos += n4Minus1 + 1;

out.writeBytes(scratch, writePos);
}

// tail vints
for (; readPos < limit; readPos++) {
out.writeVInt(toInt(values[readPos]));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/** Utility classes for backward compatibility codecs. */
package org.apache.lucene.backward_codecs.util;
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.util;

import java.io.IOException;
import org.apache.lucene.backward_codecs.store.DataOutputUtil;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.tests.util.LuceneTestCase;

public class TestGroupVIntUtil extends LuceneTestCase {

public void testLongArrayRoundTrip() throws IOException {
long[] original = {1L, 127L, 128L, 16383L, 16384L, 2097151L, 2097152L, 268435455L};

// Write using the backward-codecs utility
ByteBuffersDataOutput out = new ByteBuffersDataOutput();
DataOutputUtil.writeGroupVInts(out, original, original.length);

// Read back using the backward-codecs utility
ByteBuffersDataInput in = out.toDataInput();
long[] result = new long[original.length];
GroupVIntUtil.readGroupVInts(in, result, original.length);

assertArrayEquals(original, result);
}

public void testSingleGroupVInt() throws IOException {
long[] original = {1L, 2L, 3L, 4L};

ByteBuffersDataOutput out = new ByteBuffersDataOutput();
byte[] scratch = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP];
GroupVIntUtil.writeGroupVInts(out, scratch, original, original.length);

ByteBuffersDataInput in = out.toDataInput();
long[] result = new long[original.length];
GroupVIntUtil.readGroupVInt(in, result, 0);

assertArrayEquals(original, result);
}
}
18 changes: 0 additions & 18 deletions lucene/core/src/java/org/apache/lucene/store/DataOutput.java
Original file line number Diff line number Diff line change
Expand Up @@ -325,24 +325,6 @@ public void writeSetOfStrings(Set<String> set) throws IOException {
}
}

/**
* Encode integers using group-varint. It uses {@link DataOutput#writeVInt VInt} to encode tail
* values that are not enough for a group. we need a long[] because this is what postings are
* using, all longs are actually required to be integers.
*
* @param values the values to write
* @param limit the number of values to write.
* @lucene.experimental
* @deprecated This method is preserved only for backwards codecs
*/
@Deprecated
public void writeGroupVInts(long[] values, int limit) throws IOException {
if (groupVIntBytes == null) {
groupVIntBytes = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP];
}
GroupVIntUtil.writeGroupVInts(this, groupVIntBytes, values, limit);
}

/**
* Encode integers using group-varint. It uses {@link DataOutput#writeVInt VInt} to encode tail
* values that are not enough for a group.
Expand Down
Loading
Loading