EfficientMoE · drunkcoding · Jun 5, 2025 · Jul 6, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/CITATIONS.md b/CITATIONS.md
@@ -3,6 +3,7 @@
   author       = {Leyang Xue and
                   Yao Fu and
                   Zhan Lu and
+                  Chuanhao Sun and
                   Luo Mai and
                   Mahesh Marina},
   title        = {MoE-Infinity: Efficient MoE Inference on Personal Machines with Sparsity-Aware Expert Cache},

diff --git a/README.md b/README.md
@@ -204,6 +204,7 @@ If you use MoE-Inifity for your research, please cite our [paper](https://arxiv.
   author       = {Leyang Xue and
                   Yao Fu and
                   Zhan Lu and
+                  Chuanhao Sun and
                   Luo Mai and
                   Mahesh Marina},
   title        = {MoE{-}Infinity: Efficient MoE Inference on Personal Machines with Sparsity-Aware Expert Cache},

diff --git a/core/common/constant.h b/core/common/constant.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <stdint.h>
+
+constexpr int64_t KB = 1024;
+constexpr int64_t MB = KB * KB;
+constexpr int64_t GB = KB * KB * KB;
+
+constexpr int kWrapSize = 32;
diff --git a/core/common/context.h b/core/common/context.h
@@ -0,0 +1,69 @@
+// Copyright (c) EfficientMoE.
+// SPDX-License-Identifier: Apache-2.0
+
+// EfficientMoE Team
+
+#pragma once
+
+#include <cstdint>
+#include <mutex>
+#include <unordered_map>
+
+enum class DataType { BFLOAT16 = 0, FLOAT32 = 1, FLOAT16 = 2, FP8_E4M3FN = 3 };
+
+struct Context {
+  // Add any necessary member variables or methods here
+  int64_t max_expert_tokens = 128;  // Default maximum expert tokens
+  int64_t max_tokens = 4096;        // Default maximum tokens
+  int num_experts = 8;              // Default number of experts
+  int topk = 2;                     // Default top-k value
+  int64_t hidden_dim = 1024;        // Default hidden dimension
+  int64_t intermediate_dim =
+      4096;  // Default intermediate dimension for experts
+  DataType dtype = DataType::FLOAT32;  // Default data type
+
+  void SetFromDict(const std::unordered_map<std::string, int64_t>& dict) {
+    if (dict.find("max_expert_tokens") != dict.end()) {
+      max_expert_tokens = dict.at("max_expert_tokens");
+    }
+    if (dict.find("max_tokens") != dict.end()) {
+      max_tokens = dict.at("max_tokens");
+    }
+    if (dict.find("num_experts") != dict.end()) {
+      num_experts = dict.at("num_experts");
+    }
+    if (dict.find("topk") != dict.end()) {
+      topk = dict.at("topk");
+    }
+    if (dict.find("hidden_dim") != dict.end()) {
+      hidden_dim = dict.at("hidden_dim");
+    }
+    if (dict.find("intermediate_dim") != dict.end()) {
+      intermediate_dim = dict.at("intermediate_dim");
+    }
+    if (dict.find("dtype") != dict.end()) {
+      int dtype_value = dict.at("dtype");
+      switch (dtype_value) {
+        case 0:
+          dtype = DataType::BFLOAT16;
+          break;
+        case 1:
+          dtype = DataType::FLOAT32;
+          break;
+        case 2:
+          dtype = DataType::FLOAT16;
+          break;
+        case 3:
+          dtype = DataType::FP8_E4M3FN;
+          break;
+        default:
+          throw std::invalid_argument("Invalid dtype value");
+      }
+    }
+  }
+};
+
+Context& getContext() {
+  static Context instance;
+  return instance;
+}
diff --git a/core/common/generator.h b/core/common/generator.h
@@ -0,0 +1,88 @@
+// Copyright (c) EfficientMoE.
+// SPDX-License-Identifier: Apache-2.0
+
+// EfficientMoE Team
+
+#pragma once
+
+#include <uuid/uuid.h>
+
+#include <bitset>
+#include <chrono>
+#include <iomanip>
+#include <mutex>
+#include <random>
+#include <sstream>
+#include <string>
+#include <atomic>
+
+class NumGenerator {
+ public:
+  // 0是一个特殊的id，必须保证永远不会生成0这个id
+  static uint32_t ctx_id() {
+    std::lock_guard g(mutex_);
+    uint32_t ret = ctx_id_++;
+    if (ret == 0) ret = ctx_id_++;
+    return ret;
+  }
+  static uint32_t flowno() {
+    static std::atomic<uint32_t> flowno(1024);
+    return flowno++;
+  }
+
+ private:
+  static std::mutex mutex_;
+  static uint32_t ctx_id_;  // Start from 1 to avoid 0
+};
+
+// Static member definitions
+std::mutex NumGenerator::mutex_;
+uint32_t NumGenerator::ctx_id_ = 1;
+
+inline std::string GenUUID() {
+  uuid_t uuid;
+  uuid_generate(uuid);
+  char uuid_str[37];
+  uuid_unparse(uuid, uuid_str);
+  return std::string(uuid_str);
+}
+
+inline uint64_t GenUUID64() {
+  static std::random_device rd;
+  static std::mt19937_64 eng(rd());
+  static std::uniform_int_distribution<uint64_t> distr;
+
+  std::bitset<64> uuid;
+  uuid = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+  uuid ^= distr(eng);
+
+  return uuid.to_ullong();
+}
+
+inline std::string CurrentTimeString() {
+  // Get current time as time_point
+  auto now = std::chrono::system_clock::now();
+
+  // Convert time_point to system time for breaking down into components
+  auto now_c = std::chrono::system_clock::to_time_t(now);
+  auto now_tm = *std::localtime(&now_c);
+
+  // Get the current time as milliseconds
+  auto now_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                    now.time_since_epoch()) %
+                1000;
+
+  // Use stringstream to format the time
+  std::ostringstream oss;
+  oss << std::put_time(&now_tm, "%Y-%m-%d %H:%M:%S");
+  oss << '.' << std::setfill('0') << std::setw(3) << now_ms.count();
+
+  return oss.str();
+}
+
+// constexpr microseconds since epoch
+inline uint64_t CurrentTimeMicros() {
+  return std::chrono::duration_cast<std::chrono::microseconds>(
+             std::chrono::system_clock::now().time_since_epoch())
+      .count();
+}
diff --git a/core/common/pytorch.h b/core/common/pytorch.h
@@ -7,6 +7,7 @@
 
 #include <torch/extension.h>
 #include "aio/archer_prio_aio_handle.h"
+#include "types.h"
 #include "base/noncopyable.h"
 
 #define CPU_DEVICE torch::Device(torch::kCPU)
@@ -27,6 +28,15 @@
 #define INT64_TENSOR_OPTIONS(target) TENSOR_OPTIONS(torch::kInt64, target)
 #define BFLOAT16_TENSOR_OPTIONS(target) TENSOR_OPTIONS(torch::kBFloat16, target)
 
+#define TENSOR_FROM_BLOB(blob, shape, dtype, target)      \
+  torch::from_blob(blob, shape, DoNothingDeleter<void>{}, \
+                   TENSOR_OPTIONS(dtype, target))
+
+// when dtype is a cpp type use type trait to get the torch dtype
+#define TENSOR_FROM_BLOB_CPP(blob, shape, dtype, target)  \
+  torch::from_blob(blob, shape, DoNothingDeleter<void>{}, \
+                   TENSOR_OPTIONS(torch::ScalarType(dtype), target))
+
 #define FAKE_TENSOR_SIZES torch::IntArrayRef({1})
 
 inline std::vector<uint32_t> list_to_vector(py::list list) {

diff --git a/core/common/types.h b/core/common/types.h
@@ -6,6 +6,9 @@
 #pragma once
 
 #include <cstdint>
+#include <utility>
+#include <string>
+#include <type_traits>
 
 typedef std::uint32_t TensorID;
 typedef std::size_t HashID;
@@ -35,3 +38,99 @@ template <typename T>
 struct DoNothingDeleter {
   void operator()(T* ptr) const {}
 };
+
+// Helper to get the Nth type from a parameter pack
+template <size_t N, typename... Args>
+struct GetNthType;
+
+template <typename First, typename... Rest>
+struct GetNthType<0, First, Rest...> {
+  using type = First;
+};
+
+template <size_t N, typename First, typename... Rest>
+struct GetNthType<N, First, Rest...> {
+  using type = typename GetNthType<N - 1, Rest...>::type;
+};
+
+template <size_t N, typename... Args>
+using GetNthType_t = typename GetNthType<N, Args...>::type;
+
+// Compile-time integer square root
+template <int N>
+struct ConstexprSqrt {
+  static constexpr int compute(int low = 1, int high = N) {
+    if (low == high) return low;
+    int mid = (low + high + 1) / 2;
+    return (mid * mid > N) ? compute(low, mid - 1) : compute(mid, high);
+  }
+  static constexpr int value = compute();
+};
+
+// Round to multiple helper
+template <int N, int Multiple>
+struct RoundToMultiple {
+  static constexpr int value = ((N + Multiple - 1) / Multiple) * Multiple;
+};
+
+// A constexpr function to convert any const T* pointer to void*
+template <typename T>
+constexpr void* pointer_to_void(const T* ptr) {
+  return const_cast<void*>(reinterpret_cast<const void*>(
+      ptr));  // Cast to void* while preserving constness
+}
+
+// Helper macros to generate enum and string mappings
+#define ENUM_ENTRY_COMMA(value, EnumType) value,
+#define ENUM_CASE(value, EnumType) \
+  case EnumType::value:            \
+    return #value;
+#define STRING_CASE(value, EnumType) \
+  if (s == #value) return EnumType::value;
+
+// General enum to string conversion using SFINAE
+template <typename E>
+constexpr auto enum_to_string(E e) noexcept
+    -> std::enable_if_t<std::is_enum_v<E>, const char*> {
+  // This will be specialized for each enum type
+  return "Unknown";
+}
+
+// General string to enum conversion
+template <typename E>
+constexpr auto string_to_enum(const std::string& s) noexcept
+    -> std::enable_if_t<std::is_enum_v<E>, E> {
+  // This will be specialized for each enum type
+  return static_cast<E>(0);  // Default to first enum value
+}
+
+// Macro to define enum class, enum to string, and string to enum functions
+#define DEFINE_ENUM_CLASS(EnumType, ENUM_VALUES)                             \
+  enum class EnumType { ENUM_VALUES(ENUM_ENTRY_COMMA, EnumType) Unknown };   \
+                                                                             \
+  /* Enum to string function */                                              \
+  constexpr const char* EnumType##ToString(EnumType v) {                     \
+    switch (v) {                                                             \
+      ENUM_VALUES(ENUM_CASE, EnumType)                                       \
+      default:                                                               \
+        return "Unknown";                                                    \
+    }                                                                        \
+  }                                                                          \
+                                                                             \
+  /* String to enum function */                                              \
+  EnumType StringTo##EnumType(const std::string& s) {                        \
+    ENUM_VALUES(STRING_CASE, EnumType)                                       \
+    return EnumType::Unknown;                                                \
+  }                                                                          \
+                                                                             \
+  /* Specialize generic template functions for this enum type */             \
+  template <>                                                                \
+  constexpr auto enum_to_string<EnumType>(                                   \
+      EnumType e) noexcept -> const char* {                                  \
+    return EnumType##ToString(e);                                            \
+  }                                                                          \
+                                                                             \
+  template <>                                                                \
+  auto string_to_enum<EnumType>(const std::string& s) noexcept -> EnumType { \
+    return StringTo##EnumType(s);                                            \
+  }