diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3ea1326b4608..815a9edf6a70 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -7,6 +7,11 @@ http://s.apache.org/luceneversions API Changes --------------------- +* GITHUB#15113: Move long[] group varint methods to backward-codecs. Deprecated long[] group varint + methods have been removed from core GroupVIntUtil and DataOutput classes and moved to backward-codecs + module to prevent accidental usage in benchmarks while maintaining compatibility for legacy codecs. + (Sakshi Chourasia) + * GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski) * GITHUB#14165: TieredMergePolicy's maxMergeAtOnce parameter was removed. (Adrien Grand) diff --git a/lucene/backward-codecs/src/java/module-info.java b/lucene/backward-codecs/src/java/module-info.java index 5c8cf801669b..24a7d083bd79 100644 --- a/lucene/backward-codecs/src/java/module-info.java +++ b/lucene/backward-codecs/src/java/module-info.java @@ -43,6 +43,7 @@ exports org.apache.lucene.backward_codecs.lucene103; exports org.apache.lucene.backward_codecs.packed; exports org.apache.lucene.backward_codecs.store; + exports org.apache.lucene.backward_codecs.util; provides org.apache.lucene.codecs.DocValuesFormat with org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/PostingsUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/PostingsUtil.java index 8f526f7ef104..3eb4e722bc4c 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/PostingsUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene912/PostingsUtil.java @@ -17,9 +17,10 @@ package org.apache.lucene.backward_codecs.lucene912; import java.io.IOException; +import org.apache.lucene.backward_codecs.store.DataOutputUtil; +import org.apache.lucene.backward_codecs.util.GroupVIntUtil; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.GroupVIntUtil; /** Utility class to encode/decode postings block. */ final class PostingsUtil { @@ -61,7 +62,7 @@ static void writeVIntBlock( docBuffer[i] = (docBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0); } } - docOut.writeGroupVInts(docBuffer, num); + DataOutputUtil.writeGroupVInts(docOut, docBuffer, num); if (writeFreqs) { for (int i = 0; i < num; i++) { final int freq = (int) freqBuffer[i]; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java index dce8c2b145d5..3a817dba579f 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java @@ -17,9 +17,10 @@ package org.apache.lucene.backward_codecs.lucene99; import java.io.IOException; +import org.apache.lucene.backward_codecs.store.DataOutputUtil; +import org.apache.lucene.backward_codecs.util.GroupVIntUtil; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.GroupVIntUtil; /** Utility class to encode/decode postings block. */ final class PostingsUtil { @@ -61,7 +62,7 @@ static void writeVIntBlock( docBuffer[i] = (docBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0); } } - docOut.writeGroupVInts(docBuffer, num); + DataOutputUtil.writeGroupVInts(docOut, docBuffer, num); if (writeFreqs) { for (int i = 0; i < num; i++) { final int freq = (int) freqBuffer[i]; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/DataOutputUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/DataOutputUtil.java new file mode 100644 index 000000000000..5da06bca6508 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/DataOutputUtil.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_codecs.store; + +import java.io.IOException; +import org.apache.lucene.backward_codecs.util.GroupVIntUtil; +import org.apache.lucene.store.DataOutput; + +/** + * Utility methods for DataOutput operations that are only used by backward codecs. + * + * @lucene.internal + */ +public final class DataOutputUtil { + + private DataOutputUtil() {} // no instance + + /** + * Encode integers using group-varint. It uses {@link DataOutput#writeVInt VInt} to encode tail + * values that are not enough for a group. we need a long[] because this is what postings are + * using, all longs are actually required to be integers. + * + * @param values the values to write + * @param limit the number of values to write. + * @lucene.experimental + */ + public static void writeGroupVInts(DataOutput out, long[] values, int limit) throws IOException { + byte[] groupVIntBytes = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP]; + GroupVIntUtil.writeGroupVInts(out, groupVIntBytes, values, limit); + } +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/util/GroupVIntUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/util/GroupVIntUtil.java new file mode 100644 index 000000000000..786eaf4fa0d9 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/util/GroupVIntUtil.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_codecs.util; + +import java.io.IOException; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.BitUtil; + +/** + * This class contains utility methods for group varint encoding/decoding of long[] arrays. These + * methods are only used by backward codecs and have been moved here from the main GroupVIntUtil + * class. + * + * @lucene.internal + */ +public final class GroupVIntUtil { + /** The maximum length of a single group-varint is 1 byte flag and 4 integers. */ + public static final int MAX_LENGTH_PER_GROUP = Byte.BYTES + 4 * Integer.BYTES; + + private GroupVIntUtil() {} // no instance + + /** + * Read all the group varints, including the tail vints to a long[]. + * + * @param dst the array to read ints into. + * @param limit the number of int values to read. + * @lucene.experimental + */ + public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException { + int i; + for (i = 0; i <= limit - 4; i += 4) { + readGroupVInt(in, dst, i); + } + for (; i < limit; ++i) { + dst[i] = in.readVInt() & 0xFFFFFFFFL; + } + } + + /** + * Default implementation of read single group, for optimal performance, you should use {@link + * GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead. + * + * @param in the input to use to read data. + * @param dst the array to read ints into. + * @param offset the offset in the array to start storing ints. + */ + public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException { + final int flag = in.readByte() & 0xFF; + + final int n1Minus1 = flag >> 6; + final int n2Minus1 = (flag >> 4) & 0x03; + final int n3Minus1 = (flag >> 2) & 0x03; + final int n4Minus1 = flag & 0x03; + + dst[offset] = readIntInGroup(in, n1Minus1) & 0xFFFFFFFFL; + dst[offset + 1] = readIntInGroup(in, n2Minus1) & 0xFFFFFFFFL; + dst[offset + 2] = readIntInGroup(in, n3Minus1) & 0xFFFFFFFFL; + dst[offset + 3] = readIntInGroup(in, n4Minus1) & 0xFFFFFFFFL; + } + + private static int readIntInGroup(DataInput in, int numBytesMinus1) throws IOException { + switch (numBytesMinus1) { + case 0: + return in.readByte() & 0xFF; + case 1: + return in.readShort() & 0xFFFF; + case 2: + return (in.readShort() & 0xFFFF) | ((in.readByte() & 0xFF) << 16); + default: + return in.readInt(); + } + } + + private static int numBytes(int v) { + // | 1 to return 1 when v = 0 + return Integer.BYTES - (Integer.numberOfLeadingZeros(v | 1) >> 3); + } + + private static int toInt(long value) { + if ((Long.compareUnsigned(value, 0xFFFFFFFFL) > 0)) { + throw new ArithmeticException("integer overflow"); + } + return (int) value; + } + + /** + * The implementation for group-varint encoding, It uses a maximum of {@link + * #MAX_LENGTH_PER_GROUP} bytes scratch buffer. + */ + public static void writeGroupVInts(DataOutput out, byte[] scratch, long[] values, int limit) + throws IOException { + int readPos = 0; + + // encode each group + while ((limit - readPos) >= 4) { + int writePos = 0; + final int n1Minus1 = numBytes(toInt(values[readPos])) - 1; + final int n2Minus1 = numBytes(toInt(values[readPos + 1])) - 1; + final int n3Minus1 = numBytes(toInt(values[readPos + 2])) - 1; + final int n4Minus1 = numBytes(toInt(values[readPos + 3])) - 1; + int flag = (n1Minus1 << 6) | (n2Minus1 << 4) | (n3Minus1 << 2) | (n4Minus1); + scratch[writePos++] = (byte) flag; + BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); + writePos += n1Minus1 + 1; + BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); + writePos += n2Minus1 + 1; + BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); + writePos += n3Minus1 + 1; + BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); + writePos += n4Minus1 + 1; + + out.writeBytes(scratch, writePos); + } + + // tail vints + for (; readPos < limit; readPos++) { + out.writeVInt(toInt(values[readPos])); + } + } +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/util/package-info.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/util/package-info.java new file mode 100644 index 000000000000..d5bd2fdf31fd --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/util/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Utility classes for backward compatibility codecs. */ +package org.apache.lucene.backward_codecs.util; diff --git a/lucene/backward-codecs/src/test/java/org/apache/lucene/backward_codecs/util/TestGroupVIntUtil.java b/lucene/backward-codecs/src/test/java/org/apache/lucene/backward_codecs/util/TestGroupVIntUtil.java new file mode 100644 index 000000000000..7e100c7bd697 --- /dev/null +++ b/lucene/backward-codecs/src/test/java/org/apache/lucene/backward_codecs/util/TestGroupVIntUtil.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_codecs.util; + +import java.io.IOException; +import org.apache.lucene.backward_codecs.store.DataOutputUtil; +import org.apache.lucene.store.ByteBuffersDataInput; +import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.tests.util.LuceneTestCase; + +public class TestGroupVIntUtil extends LuceneTestCase { + + public void testLongArrayRoundTrip() throws IOException { + long[] original = {1L, 127L, 128L, 16383L, 16384L, 2097151L, 2097152L, 268435455L}; + + // Write using the backward-codecs utility + ByteBuffersDataOutput out = new ByteBuffersDataOutput(); + DataOutputUtil.writeGroupVInts(out, original, original.length); + + // Read back using the backward-codecs utility + ByteBuffersDataInput in = out.toDataInput(); + long[] result = new long[original.length]; + GroupVIntUtil.readGroupVInts(in, result, original.length); + + assertArrayEquals(original, result); + } + + public void testSingleGroupVInt() throws IOException { + long[] original = {1L, 2L, 3L, 4L}; + + ByteBuffersDataOutput out = new ByteBuffersDataOutput(); + byte[] scratch = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP]; + GroupVIntUtil.writeGroupVInts(out, scratch, original, original.length); + + ByteBuffersDataInput in = out.toDataInput(); + long[] result = new long[original.length]; + GroupVIntUtil.readGroupVInt(in, result, 0); + + assertArrayEquals(original, result); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/store/DataOutput.java b/lucene/core/src/java/org/apache/lucene/store/DataOutput.java index 3d856cbc5f60..9085f165ecfa 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataOutput.java @@ -325,24 +325,6 @@ public void writeSetOfStrings(Set set) throws IOException { } } - /** - * Encode integers using group-varint. It uses {@link DataOutput#writeVInt VInt} to encode tail - * values that are not enough for a group. we need a long[] because this is what postings are - * using, all longs are actually required to be integers. - * - * @param values the values to write - * @param limit the number of values to write. - * @lucene.experimental - * @deprecated This method is preserved only for backwards codecs - */ - @Deprecated - public void writeGroupVInts(long[] values, int limit) throws IOException { - if (groupVIntBytes == null) { - groupVIntBytes = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP]; - } - GroupVIntUtil.writeGroupVInts(this, groupVIntBytes, values, limit); - } - /** * Encode integers using group-varint. It uses {@link DataOutput#writeVInt VInt} to encode tail * values that are not enough for a group. diff --git a/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java b/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java index f3b19c519b1e..5ad0ed6479ac 100644 --- a/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java @@ -137,13 +137,6 @@ private static int numBytes(int v) { return Integer.BYTES - (Integer.numberOfLeadingZeros(v | 1) >> 3); } - private static int toInt(long value) { - if ((Long.compareUnsigned(value, 0xFFFFFFFFL) > 0)) { - throw new ArithmeticException("integer overflow"); - } - return (int) value; - } - /** * The implementation for group-varint encoding, It uses a maximum of {@link * #MAX_LENGTH_PER_GROUP} bytes scratch buffer. @@ -178,83 +171,4 @@ public static void writeGroupVInts(DataOutput out, byte[] scratch, int[] values, out.writeVInt(values[readPos]); } } - - /** - * Read all the group varints, including the tail vints to a long[]. - * - * @param dst the array to read ints into. - * @param limit the number of int values to read. - * @lucene.experimental - * @deprecated Only for backwards codecs - */ - @Deprecated - public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException { - int i; - for (i = 0; i <= limit - 4; i += 4) { - readGroupVInt(in, dst, i); - } - for (; i < limit; ++i) { - dst[i] = in.readVInt() & 0xFFFFFFFFL; - } - } - - /** - * Default implementation of read single group, for optimal performance, you should use {@link - * GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead. - * - * @param in the input to use to read data. - * @param dst the array to read ints into. - * @param offset the offset in the array to start storing ints. - * @deprecated Only for backwards codecs - */ - @Deprecated - public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException { - final int flag = in.readByte() & 0xFF; - - final int n1Minus1 = flag >> 6; - final int n2Minus1 = (flag >> 4) & 0x03; - final int n3Minus1 = (flag >> 2) & 0x03; - final int n4Minus1 = flag & 0x03; - - dst[offset] = readIntInGroup(in, n1Minus1) & 0xFFFFFFFFL; - dst[offset + 1] = readIntInGroup(in, n2Minus1) & 0xFFFFFFFFL; - dst[offset + 2] = readIntInGroup(in, n3Minus1) & 0xFFFFFFFFL; - dst[offset + 3] = readIntInGroup(in, n4Minus1) & 0xFFFFFFFFL; - } - - /** - * The implementation for group-varint encoding, It uses a maximum of {@link - * #MAX_LENGTH_PER_GROUP} bytes scratch buffer. - */ - @Deprecated - public static void writeGroupVInts(DataOutput out, byte[] scratch, long[] values, int limit) - throws IOException { - int readPos = 0; - - // encode each group - while ((limit - readPos) >= 4) { - int writePos = 0; - final int n1Minus1 = numBytes(toInt(values[readPos])) - 1; - final int n2Minus1 = numBytes(toInt(values[readPos + 1])) - 1; - final int n3Minus1 = numBytes(toInt(values[readPos + 2])) - 1; - final int n4Minus1 = numBytes(toInt(values[readPos + 3])) - 1; - int flag = (n1Minus1 << 6) | (n2Minus1 << 4) | (n3Minus1 << 2) | (n4Minus1); - scratch[writePos++] = (byte) flag; - BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); - writePos += n1Minus1 + 1; - BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); - writePos += n2Minus1 + 1; - BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); - writePos += n3Minus1 + 1; - BitUtil.VH_LE_INT.set(scratch, writePos, (int) (values[readPos++])); - writePos += n4Minus1 + 1; - - out.writeBytes(scratch, writePos); - } - - // tail vints - for (; readPos < limit; readPos++) { - out.writeVInt(toInt(values[readPos])); - } - } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java index 27bfa3c72f11..07b5bd439ce1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java @@ -1480,38 +1480,6 @@ public void testDataTypes() throws IOException { } } - @Deprecated - public void testGroupVIntOverflow() throws IOException { - try (Directory dir = getDirectory(createTempDir("testGroupVIntOverflow"))) { - final int size = 32; - final long[] values = new long[size]; - final long[] restore = new long[size]; - values[0] = 1L << 31; // values[0] = 2147483648 as long, but as int it is -2147483648 - - for (int i = 0; i < size; i++) { - if (random().nextBoolean()) { - values[i] = values[0]; - } - } - - // a smaller limit value cover default implementation of readGroupVInts - // and a bigger limit value cover the faster implementation. - final int limit = random().nextInt(1, size); - IndexOutput out = dir.createOutput("test", IOContext.DEFAULT); - out.writeGroupVInts(values, limit); - out.close(); - try (IndexInput in = dir.openInput("test", IOContext.DEFAULT)) { - GroupVIntUtil.readGroupVInts(in, restore, limit); - for (int i = 0; i < limit; i++) { - assertEquals(values[i], restore[i]); - } - } - - values[0] = 0xFFFFFFFFL + 1; - assertThrows(ArithmeticException.class, () -> out.writeGroupVInts(values, 4)); - } - } - public void testGroupVInt() throws IOException { try (Directory dir = getDirectory(createTempDir("testGroupVInt"))) { // test fallback to default implementation of readGroupVInt