Make sure we don't allocate VMBigInteger objects for values fitting into the small int range (#75)

smarr · web-flow · commit 6b807e1bb4ab · 2025-08-17T11:31:06.000+01:00
This is the follow up to #74 and makes sure that we don't have VMBigInteger objects that could just be tagged ints. The PR also fixes a typing issue with the relevant macros and has a few other minor improvements.
diff --git a/src/lib/InfInt.h b/src/lib/InfInt.h
@@ -19,7 +19,7 @@
  *      toString:       converts it to a string
  *
  *   There are also conversion methods which allow conversion to primitive
- * types: toLong, toLongLong, toUnsignedLong,
+ * types: toInt64, toUnsignedLong,
  * toUnsignedLongLong.
  *
  *   You may define INFINT_USE_EXCEPTIONS and library methods will start raising
@@ -151,6 +151,7 @@ class InfInt {
 
     /* basic properties */
     [[nodiscard]] bool isZero() const;
+    [[nodiscard]] bool isWithinSmallIntRange() const;
 
     /* integer square root */
     [[nodiscard]] InfInt intSqrt() const;  // throw
@@ -166,8 +167,7 @@ class InfInt {
     [[nodiscard]] std::string toString() const;
 
     /* conversion to primitive types */
-    [[nodiscard]] int64_t toLong() const;               // throw
-    [[nodiscard]] int64_t toLongLong() const;           // throw
+    [[nodiscard]] int64_t toInt64() const;              // throw
     [[nodiscard]] uint64_t toUnsignedLong() const;      // throw
     [[nodiscard]] uint64_t toUnsignedLongLong() const;  // throw
 
@@ -740,6 +740,81 @@ inline bool InfInt::isZero() const {
     return val.size() == 1 && val[0] == 0;
 }
 
+inline bool InfInt::isWithinSmallIntRange() const {
+    // this is implemented based on the <= and >= operators
+    // and the known encoding of VMTAGGEDINTEGER_MAX and VMTAGGEDINTEGER_MIN as
+    // an InfInt
+    if (pos) {
+        // if the value is positive, we only need to check it against
+        // VMTAGGEDINTEGER_MAX
+
+        // if it has more than 3 parts, we now it's out of range,
+        // but if it has less, then we know it's within range.
+        size_t const size = val.size();
+        if (size > 3) {
+            return false;
+        }
+        if (size < 3) {
+            return true;
+        }
+
+        // now we look at the individual parts
+        // starting at the most significant
+        if (val[2] < 4) {
+            return true;
+        }
+        if (val[2] > 4) {
+            return false;
+        }
+
+        if (val[1] < 611686018) {
+            return true;
+        }
+        if (val[1] > 611686018) {
+            return false;
+        }
+
+        if (val[0] <= 427387903) {
+            return true;
+        }
+        return false;
+    }
+
+    // if the value is negative, we only need to check it against
+    // VMTAGGEDINTEGER_MIN
+
+    // if it has more than 3 parts, we now it's out of range,
+    // but if it has less, then we know it's within range.
+    size_t const size = val.size();
+    if (size > 3) {
+        return false;
+    }
+    if (size < 3) {
+        return true;
+    }
+
+    // now we look at the individual parts
+    // starting at the most significant
+    if (val[2] < 4) {
+        return true;
+    }
+    if (val[2] > 4) {
+        return false;
+    }
+
+    if (val[1] < 611686018) {
+        return true;
+    }
+    if (val[1] > 611686018) {
+        return false;
+    }
+
+    if (val[0] <= 427387904) {
+        return true;
+    }
+    return false;
+}
+
 inline InfInt InfInt::intSqrt() const {
     // PROFINY_SCOPE
     if (*this <= InfInt()) {  // TODO(smarr): replace by a more specific check
@@ -837,7 +912,7 @@ inline int InfInt::truncateToInt() const {
     return pos ? result : -result;
 }
 
-inline int64_t InfInt::toLong() const {
+inline int64_t InfInt::toInt64() const {
     // PROFINY_SCOPE
     if (*this > InfInt(INT64_MAX) || *this < InfInt(INT64_MIN)) {
 #ifdef INFINT_USE_EXCEPTIONS
@@ -862,22 +937,6 @@ inline int64_t InfInt::truncateToInt64() const {
     return pos ? result : -result;
 }
 
-inline int64_t InfInt::toLongLong() const {
-    // PROFINY_SCOPE
-    if (*this > InfInt(INT64_MAX) || *this < InfInt(INT64_MIN)) {
-#ifdef INFINT_USE_EXCEPTIONS
-        throw InfIntException("out of bounds");
-#else
-        std::cerr << "Out of LLONG bounds: " << *this << '\n';
-#endif
-    }
-    int64_t result = 0;
-    for (int i = (int)val.size() - 1; i >= 0; --i) {
-        result = result * BASE + val[i];
-    }
-    return pos ? result : -result;
-}
-
 inline int64_t InfInt::toLongLongForHash() const {
     int64_t result = 0;
     for (int i = (int)val.size() - 1; i >= 0; --i) {
diff --git a/src/primitives/Integer.cpp b/src/primitives/Integer.cpp
@@ -69,7 +69,7 @@ static vm_oop_t intPlus(vm_oop_t leftObj, vm_oop_t rightObj) {
             if (unlikely(__builtin_add_overflow(left, right, &result))) {
                 InfInt const l(left);
                 InfInt const r(right);
-                return Universe::NewBigInteger(l + r);
+                return Universe::NewInt(l + r);
             }
             return NEW_INT(result);
         }
@@ -107,7 +107,7 @@ static vm_oop_t intLeftShift(vm_oop_t leftObj, vm_oop_t rightObj) {
         auto const numberOfLeadingZeros = __builtin_clzll((uint64_t)left);
 
         if (64 - numberOfLeadingZeros + right > 63) {
-            return Universe::NewBigInteger(InfInt(left) << right);
+            return Universe::NewInt(InfInt(left) << right);
         }
 
         // NOLINTNEXTLINE(hicpp-signed-bitwise)
@@ -143,7 +143,7 @@ static vm_oop_t intMinus(vm_oop_t leftObj, vm_oop_t rightObj) {
             if (unlikely(__builtin_sub_overflow(left, right, &result))) {
                 InfInt const l(left);
                 InfInt const r(right);
-                return Universe::NewBigInteger(l - r);
+                return Universe::NewInt(l - r);
             }
             return NEW_INT(result);
         }
@@ -172,7 +172,7 @@ static vm_oop_t intStar(vm_oop_t leftObj, vm_oop_t rightObj) {
             if (unlikely(__builtin_mul_overflow(left, right, &result))) {
                 InfInt const l(left);
                 InfInt const r(right);
-                return Universe::NewBigInteger(l * r);
+                return Universe::NewInt(l * r);
             }
             return NEW_INT(result);
         }
diff --git a/src/unitTests/InfIntTests.cpp b/src/unitTests/InfIntTests.cpp
@@ -4,42 +4,72 @@
 #include <cstdint>
 
 #include "../lib/InfInt.h"
+#include "../vmobjects/ObjectFormats.h"
 
 void InfIntTest::testBasicNumbers() {
     InfInt const zero(int64_t(0LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zero.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zero.toInt64());
 
     InfInt const one(int64_t(1LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(1LL), one.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(1LL), one.toInt64());
 
     InfInt const a500(int64_t(500LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(500LL), a500.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(500LL), a500.toInt64());
 
     InfInt const a32bitNum(int64_t(3221258751LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(3221258751LL), a32bitNum.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(3221258751LL), a32bitNum.toInt64());
 
     InfInt const a48bitNum(int64_t(211109453791743LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(211109453791743LL), a48bitNum.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(211109453791743LL), a48bitNum.toInt64());
 
     InfInt const a63bitNum(int64_t(8070661641701720575LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(8070661641701720575LL),
-                         a63bitNum.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(8070661641701720575LL), a63bitNum.toInt64());
 }
 
 void InfIntTest::testIsZero() {
     InfInt const zero{};
-    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zero.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zero.toInt64());
     CPPUNIT_ASSERT(zero.isZero());
 
     InfInt const zeroInt64(int64_t(0LL));
-    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zeroInt64.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zeroInt64.toInt64());
     CPPUNIT_ASSERT(zeroInt64.isZero());
 
     InfInt const zeroStr("0");
-    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zeroStr.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), zeroStr.toInt64());
     CPPUNIT_ASSERT(zeroStr.isZero());
 
     InfInt const negZeroStr("-0");
-    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), negZeroStr.toLongLong());
+    CPPUNIT_ASSERT_EQUAL(int64_t(0LL), negZeroStr.toInt64());
     CPPUNIT_ASSERT(negZeroStr.isZero());
 }
+
+void InfIntTest::testIsWithinSmallIntRange() {
+    InfInt const smallIntMax(VMTAGGEDINTEGER_MAX);
+    CPPUNIT_ASSERT_EQUAL(int64_t(VMTAGGEDINTEGER_MAX), smallIntMax.toInt64());
+    CPPUNIT_ASSERT(smallIntMax.isWithinSmallIntRange());
+
+    InfInt const smallIntMin(VMTAGGEDINTEGER_MIN);
+    CPPUNIT_ASSERT_EQUAL(int64_t(VMTAGGEDINTEGER_MIN), smallIntMin.toInt64());
+    CPPUNIT_ASSERT(smallIntMin.isWithinSmallIntRange());
+
+    InfInt const smallIntMaxPlusOne(VMTAGGEDINTEGER_MAX + 1LL);
+    CPPUNIT_ASSERT_EQUAL(int64_t(VMTAGGEDINTEGER_MAX + 1LL),
+                         smallIntMaxPlusOne.toInt64());
+    CPPUNIT_ASSERT(!smallIntMaxPlusOne.isWithinSmallIntRange());
+
+    InfInt const smallIntMinMinusOne(VMTAGGEDINTEGER_MIN - 1LL);
+    CPPUNIT_ASSERT_EQUAL(int64_t(VMTAGGEDINTEGER_MIN - 1LL),
+                         smallIntMinMinusOne.toInt64());
+    CPPUNIT_ASSERT(!smallIntMinMinusOne.isWithinSmallIntRange());
+
+    InfInt const smallIntMaxMinusOne(VMTAGGEDINTEGER_MAX - 1LL);
+    CPPUNIT_ASSERT_EQUAL(int64_t(VMTAGGEDINTEGER_MAX - 1LL),
+                         smallIntMaxMinusOne.toInt64());
+    CPPUNIT_ASSERT(smallIntMaxMinusOne.isWithinSmallIntRange());
+
+    InfInt const smallIntMinPlusOne(VMTAGGEDINTEGER_MIN + 1LL);
+    CPPUNIT_ASSERT_EQUAL(int64_t(VMTAGGEDINTEGER_MIN + 1LL),
+                         smallIntMinPlusOne.toInt64());
+    CPPUNIT_ASSERT(smallIntMinPlusOne.isWithinSmallIntRange());
+}
diff --git a/src/unitTests/InfIntTests.h b/src/unitTests/InfIntTests.h
@@ -10,6 +10,7 @@ class InfIntTest : public CPPUNIT_NS::TestCase {
     CPPUNIT_TEST_SUITE(InfIntTest);  // NOLINT(misc-const-correctness)
     CPPUNIT_TEST(testBasicNumbers);
     CPPUNIT_TEST(testIsZero);
+    CPPUNIT_TEST(testIsWithinSmallIntRange);
     CPPUNIT_TEST_SUITE_END();
 
 public:
@@ -19,4 +20,5 @@ class InfIntTest : public CPPUNIT_NS::TestCase {
 private:
     static void testBasicNumbers();
     static void testIsZero();
+    static void testIsWithinSmallIntRange();
 };
diff --git a/src/vm/IsValidObject.cpp b/src/vm/IsValidObject.cpp
@@ -198,7 +198,8 @@ void obtain_vtables_of_known_classes(VMSymbol* someValidSymbol) {
     auto* i = new (GetHeap<HEAP_CLS>(), 0) VMInteger(0);
     vt_integer = get_vtable(i);
 
-    auto* bi = new (GetHeap<HEAP_CLS>(), 0) VMBigInteger("0", false);
+    auto* bi =
+        new (GetHeap<HEAP_CLS>(), 0) VMBigInteger("4611686018427387904", false);
     vt_big_integer = get_vtable(bi);
 
     auto* mth = new (GetHeap<HEAP_CLS>(), 0)
diff --git a/src/vm/Universe.cpp b/src/vm/Universe.cpp
@@ -790,7 +790,10 @@ VMInteger* Universe::NewInteger(int64_t value) {
     return new (GetHeap<HEAP_CLS>(), 0) VMInteger(value);
 }
 
-VMBigInteger* Universe::NewBigInteger(InfInt&& value) {
+vm_oop_t Universe::NewInt(InfInt&& value) {
+    if (value.isWithinSmallIntRange()) {
+        return NEW_INT(value.toInt64());
+    }
     return new (GetHeap<HEAP_CLS>(), 0) VMBigInteger(std::move(value));
 }
 
diff --git a/src/vm/Universe.h b/src/vm/Universe.h
@@ -82,7 +82,9 @@ class Universe {
     static VMObject* NewInstance(VMClass* /*classOfInstance*/);
     static VMObject* NewInstanceWithoutFields();
     static VMInteger* NewInteger(int64_t /*value*/);
-    static VMBigInteger* NewBigInteger(InfInt&& /*value*/);
+
+    static vm_oop_t NewInt(InfInt&& /*value*/);
+
     static VMBigInteger* NewBigIntegerFromInt(int64_t /*value*/);
     static VMBigInteger* NewBigIntegerFromStr(const char* /*value*/,
                                               bool /* negateValue */);
diff --git a/src/vmobjects/ObjectFormats.h b/src/vmobjects/ObjectFormats.h
@@ -50,7 +50,8 @@
 
 #ifdef __GNUC__
   #define VMTAGGED_INTEGER_WITHIN_RANGE_CHECK(X) \
-      ((X) >= VMTAGGEDINTEGER_MIN && (X) <= VMTAGGEDINTEGER_MAX)
+      ((int64_t(X) >= VMTAGGEDINTEGER_MIN) &&    \
+       (int64_t(X) <= VMTAGGEDINTEGER_MAX))
 #else
 __attribute__((always_inline)) inline bool VMTAGGED_INTEGER_WITHIN_RANGE_CHECK(
     int64_t X) {
@@ -65,7 +66,7 @@ __attribute__((always_inline)) inline bool VMTAGGED_INTEGER_WITHIN_RANGE_CHECK(
 #if ADDITIONAL_ALLOCATION
   #define TAG_INTEGER(X)                                                   \
       ((VMTAGGED_INTEGER_WITHIN_RANGE_CHECK(X) && Universe::NewInteger(0)) \
-           ? ((vm_oop_t)(((X) << 1U) | 1U))                                \
+           ? ((vm_oop_t)((((uintptr_t)(X)) << 1U) | 1U))                   \
            : (Universe::NewBigIntegerFromInt(X)))
 #else
   #define TAG_INTEGER(X)                                 \
@@ -75,9 +76,6 @@ __attribute__((always_inline)) inline bool VMTAGGED_INTEGER_WITHIN_RANGE_CHECK(
 #endif
 
 #if USE_TAGGING
-  #define INT_VAL(X)                                                           \
-      (IS_TAGGED(X) ? ((int64_t)(X) >> 1U) /* NOLINT (hicpp-signed-bitwise) */ \
-                    : (((VMInteger*)(X))->GetEmbeddedInteger()))
   #define SMALL_INT_VAL(X) \
       ((int64_t)(X) >> 1U) /* NOLINT (hicpp-signed-bitwise) */
   #define NEW_INT(X) (TAG_INTEGER((X)))
diff --git a/src/vmobjects/VMBigInteger.h b/src/vmobjects/VMBigInteger.h
@@ -9,9 +9,15 @@ class VMBigInteger : public AbstractVMObject {
     typedef GCBigInteger Stored;
 
     explicit VMBigInteger(const char* value, bool negate)
-        : embeddedInteger(negate ? -InfInt(value) : InfInt(value)) {}
-    explicit VMBigInteger(int64_t value) : embeddedInteger(InfInt(value)) {}
-    explicit VMBigInteger(const InfInt&& value) : embeddedInteger(value) {}
+        : embeddedInteger(negate ? -InfInt(value) : InfInt(value)) {
+        assert(!embeddedInteger.isWithinSmallIntRange());
+    }
+    explicit VMBigInteger(int64_t value) : embeddedInteger(InfInt(value)) {
+        assert(!embeddedInteger.isWithinSmallIntRange());
+    }
+    explicit VMBigInteger(const InfInt&& value) : embeddedInteger(value) {
+        assert(!embeddedInteger.isWithinSmallIntRange());
+    }
 
     ~VMBigInteger() override = default;
 
@@ -27,7 +33,7 @@ class VMBigInteger : public AbstractVMObject {
     }
 
     [[nodiscard]] inline int64_t GetHash() const override {
-        return embeddedInteger.toLongLong();
+        return embeddedInteger.toInt64();
     }
 
     void MarkObjectAsInvalid() override;

Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ static vm_oop_t intPlus(vm_oop_t leftObj, vm_oop_t rightObj) {`
`69`	`69`	`if (unlikely(__builtin_add_overflow(left, right, &result))) {`
`70`	`70`	`InfInt const l(left);`
`71`	`71`	`InfInt const r(right);`
`72`		`- return Universe::NewBigInteger(l + r);`
	`72`	`+ return Universe::NewInt(l + r);`
`73`	`73`	`}`
`74`	`74`	`return NEW_INT(result);`
`75`	`75`	`}`
`@@ -107,7 +107,7 @@ static vm_oop_t intLeftShift(vm_oop_t leftObj, vm_oop_t rightObj) {`
`107`	`107`	`auto const numberOfLeadingZeros = __builtin_clzll((uint64_t)left);`
`108`	`108`
`109`	`109`	`if (64 - numberOfLeadingZeros + right > 63) {`
`110`		`- return Universe::NewBigInteger(InfInt(left) << right);`
	`110`	`+ return Universe::NewInt(InfInt(left) << right);`
`111`	`111`	`}`
`112`	`112`
`113`	`113`	`// NOLINTNEXTLINE(hicpp-signed-bitwise)`
`@@ -143,7 +143,7 @@ static vm_oop_t intMinus(vm_oop_t leftObj, vm_oop_t rightObj) {`
`143`	`143`	`if (unlikely(__builtin_sub_overflow(left, right, &result))) {`
`144`	`144`	`InfInt const l(left);`
`145`	`145`	`InfInt const r(right);`
`146`		`- return Universe::NewBigInteger(l - r);`
	`146`	`+ return Universe::NewInt(l - r);`
`147`	`147`	`}`
`148`	`148`	`return NEW_INT(result);`
`149`	`149`	`}`
`@@ -172,7 +172,7 @@ static vm_oop_t intStar(vm_oop_t leftObj, vm_oop_t rightObj) {`
`172`	`172`	`if (unlikely(__builtin_mul_overflow(left, right, &result))) {`
`173`	`173`	`InfInt const l(left);`
`174`	`174`	`InfInt const r(right);`
`175`		`- return Universe::NewBigInteger(l * r);`
	`175`	`+ return Universe::NewInt(l * r);`
`176`	`176`	`}`
`177`	`177`	`return NEW_INT(result);`
`178`	`178`	`}`
Original file line number	Diff line number	Diff line change
`@@ -790,7 +790,10 @@ VMInteger* Universe::NewInteger(int64_t value) {`
`790`	`790`	`return new (GetHeap<HEAP_CLS>(), 0) VMInteger(value);`
`791`	`791`	`}`
`792`	`792`
`793`		`-VMBigInteger* Universe::NewBigInteger(InfInt&& value) {`
	`793`	`+vm_oop_t Universe::NewInt(InfInt&& value) {`
	`794`	`+ if (value.isWithinSmallIntRange()) {`
	`795`	`+ return NEW_INT(value.toInt64());`
	`796`	`+ }`
`794`	`797`	`return new (GetHeap<HEAP_CLS>(), 0) VMBigInteger(std::move(value));`
`795`	`798`	`}`
`796`	`799`