diff --git a/src/codec/RowReaderV2.cpp b/src/codec/RowReaderV2.cpp index 47101dd923..15d7531c5f 100644 --- a/src/codec/RowReaderV2.cpp +++ b/src/codec/RowReaderV2.cpp @@ -301,6 +301,9 @@ Value RowReaderV2::getValueByIndex(const int64_t index) const { return nebula::extractIntOrFloat(data_, offset); case PropertyType::SET_FLOAT: return nebula::extractIntOrFloat(data_, offset); + case PropertyType::VECTOR: + // TODO(LZY) + break; case PropertyType::UNKNOWN: break; } diff --git a/src/common/datatypes/CMakeLists.txt b/src/common/datatypes/CMakeLists.txt index 61827f7579..07f3095de3 100644 --- a/src/common/datatypes/CMakeLists.txt +++ b/src/common/datatypes/CMakeLists.txt @@ -15,6 +15,7 @@ nebula_add_library( Set.cpp Geography.cpp Duration.cpp + Vector.cpp ) nebula_add_subdirectory(test) diff --git a/src/common/datatypes/CommonCpp2Ops.h b/src/common/datatypes/CommonCpp2Ops.h index bc773472d2..0883025ed6 100644 --- a/src/common/datatypes/CommonCpp2Ops.h +++ b/src/common/datatypes/CommonCpp2Ops.h @@ -29,6 +29,7 @@ struct LineString; struct Polygon; struct Geography; struct Duration; +struct Vector; } // namespace nebula namespace apache::thrift { @@ -54,6 +55,7 @@ SPECIALIZE_CPP2OPS(nebula::LineString); SPECIALIZE_CPP2OPS(nebula::Polygon); SPECIALIZE_CPP2OPS(nebula::Geography); SPECIALIZE_CPP2OPS(nebula::Duration); +SPECIALIZE_CPP2OPS(nebula::Vector); } // namespace apache::thrift diff --git a/src/common/datatypes/Value.cpp b/src/common/datatypes/Value.cpp index 7a6b5e4fb1..3a943c841e 100644 --- a/src/common/datatypes/Value.cpp +++ b/src/common/datatypes/Value.cpp @@ -19,6 +19,7 @@ #include "common/datatypes/Map.h" #include "common/datatypes/Path.h" #include "common/datatypes/Set.h" +#include "common/datatypes/Vector.h" #include "common/datatypes/Vertex.h" namespace nebula { @@ -32,6 +33,7 @@ const Value Value::kNullOverflow(NullType::ERR_OVERFLOW); const Value Value::kNullUnknownProp(NullType::UNKNOWN_PROP); const Value Value::kNullDivByZero(NullType::DIV_BY_ZERO); const Value Value::kNullOutOfRange(NullType::OUT_OF_RANGE); +const Value Value::kVectorDimNotMatch(NullType::VEC_DIM_NOT_MATCH); const uint64_t Value::kEmptyNullType = Value::Type::__EMPTY__ | Value::Type::NULLVALUE; const uint64_t Value::kNumericType = Value::Type::INT | Value::Type::FLOAT; @@ -117,6 +119,10 @@ Value::Value(const Value& rhs) : type_(Value::Type::__EMPTY__) { setDU(rhs.value_.duVal); break; } + case Type::VECTOR: { + setVec(rhs.value_.vecVal); + break; + } default: { assert(false); break; @@ -300,6 +306,14 @@ Value::Value(std::unordered_map&& map) { setM(std::make_unique(std::move(map))); } +Value::Value(const Vector& v) { + setVec(std::make_unique(v)); +} + +Value::Value(Vector&& v) { + setVec(std::make_unique(std::move(v))); +} + const std::string& Value::typeName() const { static const std::unordered_map typeNames = { {Type::__EMPTY__, "__EMPTY__"}, @@ -320,6 +334,7 @@ const std::string& Value::typeName() const { {Type::DATASET, "dataset"}, {Type::GEOGRAPHY, "geography"}, {Type::DURATION, "duration"}, + {Type::VECTOR, "vector"}, }; static const std::unordered_map nullTypes = { @@ -330,6 +345,7 @@ const std::string& Value::typeName() const { {NullType::ERR_OVERFLOW, "ERR_OVERFLOW"}, {NullType::UNKNOWN_PROP, "UNKNOWN_PROP"}, {NullType::DIV_BY_ZERO, "DIV_BY_ZERO"}, + {NullType::VEC_DIM_NOT_MATCH, "VEC_DIM_NOT_MATCH"}, }; static const std::string unknownType = "__UNKNOWN__"; @@ -598,6 +614,21 @@ void Value::setDuration(std::unique_ptr&& v) { setDU(std::move(v)); } +void Value::setVector(const Vector& v) { + clear(); + setVec(v); +} + +void Value::setVector(Vector&& v) { + clear(); + setVec(std::move(v)); +} + +void Value::setVector(std::unique_ptr&& v) { + clear(); + setVec(std::move(v)); +} + const double& Value::getFloat() const { CHECK_EQ(type_, Type::FLOAT); return value_.fVal; @@ -713,6 +744,16 @@ const Duration* Value::getDurationPtr() const { return value_.duVal.get(); } +const Vector& Value::getVector() const { + CHECK_EQ(type_, Type::VECTOR); + return *(value_.vecVal); +} + +const Vector* Value::getVectorPtr() const { + CHECK_EQ(type_, Type::VECTOR); + return value_.vecVal.get(); +} + NullType& Value::mutableNull() { CHECK_EQ(type_, Type::NULLVALUE); return value_.nVal; @@ -798,6 +839,11 @@ Duration& Value::mutableDuration() { return *value_.duVal; } +Vector& Value::mutableVector() { + CHECK_EQ(type_, Type::VECTOR); + return *(value_.vecVal); +} + NullType Value::moveNull() { CHECK_EQ(type_, Type::NULLVALUE); NullType v = std::move(value_.nVal); @@ -917,6 +963,13 @@ Duration Value::moveDuration() { return v; } +Vector Value::moveVector() { + CHECK_EQ(type_, Type::VECTOR); + Vector v = std::move(*(value_.vecVal)); + clear(); + return v; +} + void Value::clearSlow() { switch (type_) { case Type::__EMPTY__: { @@ -1002,6 +1055,10 @@ void Value::clearSlow() { destruct(value_.duVal); break; } + case Type::VECTOR: { + destruct(value_.vecVal); + break; + } } type_ = Type::__EMPTY__; } @@ -1089,6 +1146,10 @@ Value& Value::operator=(Value&& rhs) noexcept { setDU(std::move(rhs.value_.duVal)); break; } + case Type::VECTOR: { + setVec(std::move(rhs.value_.vecVal)); + break; + } default: { assert(false); break; @@ -1175,6 +1236,10 @@ Value& Value::operator=(const Value& rhs) { setDU(rhs.value_.duVal); break; } + case Type::VECTOR: { + setVec(rhs.value_.vecVal); + break; + } default: { assert(false); break; @@ -1446,6 +1511,26 @@ void Value::setDU(Duration&& v) { type_ = Type::DURATION; } +void Value::setVec(const Vector& v) { + new (std::addressof(value_.vecVal)) std::unique_ptr(new Vector(v)); + type_ = Type::VECTOR; +} + +void Value::setVec(Vector&& v) { + new (std::addressof(value_.vecVal)) std::unique_ptr(new Vector(std::move(v))); + type_ = Type::VECTOR; +} + +void Value::setVec(const std::unique_ptr& v) { + new (std::addressof(value_.vecVal)) std::unique_ptr(new Vector(*v)); + type_ = Type::VECTOR; +} + +void Value::setVec(std::unique_ptr&& v) { + new (std::addressof(value_.vecVal)) std::unique_ptr(std::move(v)); + type_ = Type::VECTOR; +} + // Convert Nebula::Value to a value compatible with Json standard // DATE, TIME, DATETIME will be converted to strings in UTC // VERTEX, EDGES, PATH will be converted to objects @@ -1511,6 +1596,9 @@ folly::dynamic Value::toJson() const { } case Value::Type::DURATION: { return getDuration().toJson(); + } + case Value::Type::VECTOR: { + return getVector().toJson(); } // no default so the compiler will warning when lack } @@ -1588,6 +1676,8 @@ std::string Value::toString() const { return "__NULL_UNKNOWN_PROP__"; case NullType::OUT_OF_RANGE: return "__NULL_OUT_OF_RANGE__"; + case NullType::VEC_DIM_NOT_MATCH: + return "__NULL_VEC_DIM_NOT_MATCH__"; } DLOG(FATAL) << "Unknown Null type " << static_cast(getNull()); return "__NULL_BAD_TYPE__"; @@ -1639,6 +1729,9 @@ std::string Value::toString() const { } case Value::Type::DURATION: { return getDuration().toString(); + } + case Value::Type::VECTOR: { + return getVector().toString(); } // no default so the compiler will warning when lack } @@ -1861,6 +1954,7 @@ Value Value::lessThan(const Value& v) const { // e.g. What is the result of `duration('P1M') < duration('P30D')`? return kNullBadType; } + case Value::Type::VECTOR: case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return kNullBadType; @@ -1960,6 +2054,9 @@ Value Value::equal(const Value& v) const { case Value::Type::DURATION: { return getDuration() == v.getDuration(); } + case Value::Type::VECTOR: { + return getVector() == v.getVector(); + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return false; @@ -2054,6 +2151,9 @@ void swap(Value& a, Value& b) { case Value::Type::DURATION: { return "DURATION"; } + case Value::Type::VECTOR: { + return "VECTOR"; + } default: { return "__UNKNOWN__"; } @@ -2269,6 +2369,9 @@ Value operator+(const Value& lhs, const Value& rhs) { case Value::Type::NULLVALUE: { return Value::kNullValue; } + case Value::Type::VECTOR: { + return Value::kNullBadType; + } } DLOG(FATAL) << "Unknown type: " << rhs.type(); return Value::kNullBadType; @@ -2343,6 +2446,7 @@ Value operator+(const Value& lhs, const Value& rhs) { } } } + case Value::Type::VECTOR: default: { return Value::kNullBadType; } @@ -2436,6 +2540,7 @@ Value operator-(const Value& lhs, const Value& rhs) { } } } + case Value::Type::VECTOR: default: { return Value::kNullBadType; } @@ -2707,6 +2812,10 @@ bool operator<(const Value& lhs, const Value& rhs) { DLOG(FATAL) << "Duration is not comparable."; return false; } + case Value::Type::VECTOR: { + DLOG(FATAL) << "Vector is not comparable."; + return false; + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return false; @@ -2803,6 +2912,9 @@ bool Value::equals(const Value& rhs) const { case Value::Type::DURATION: { return getDuration() == rhs.getDuration(); } + case Value::Type::VECTOR: { + return getVector() == rhs.getVector(); + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return false; @@ -2865,6 +2977,9 @@ std::size_t Value::hash() const { case Type::DURATION: { return std::hash()(getDuration()); } + case Type::VECTOR: { + return std::hash()(getVector()); + } case Type::DATASET: { DLOG(FATAL) << "Hash for DATASET has not been implemented"; break; diff --git a/src/common/datatypes/Value.h b/src/common/datatypes/Value.h index 595ef6b58a..3216305fc5 100644 --- a/src/common/datatypes/Value.h +++ b/src/common/datatypes/Value.h @@ -12,6 +12,7 @@ #include "common/datatypes/Date.h" #include "common/datatypes/Duration.h" +#include "common/datatypes/Vector.h" #include "common/thrift/ThriftTypes.h" namespace apache { @@ -33,6 +34,7 @@ struct List; struct Set; struct DataSet; struct Geography; +struct Vector; enum class NullType { __NULL__ = 0, @@ -43,6 +45,7 @@ enum class NullType { UNKNOWN_PROP = 5, DIV_BY_ZERO = 6, OUT_OF_RANGE = 7, + VEC_DIM_NOT_MATCH = 8, }; struct Value { @@ -55,6 +58,7 @@ struct Value { static const Value kNullUnknownProp; static const Value kNullDivByZero; static const Value kNullOutOfRange; + static const Value kVectorDimNotMatch; static const uint64_t kEmptyNullType; static const uint64_t kNumericType; @@ -79,6 +83,7 @@ struct Value { DATASET = 1UL << 14, GEOGRAPHY = 1UL << 15, DURATION = 1UL << 16, + VECTOR = 1UL << 17, NULLVALUE = 1UL << 63, }; @@ -136,6 +141,8 @@ struct Value { Value(Duration&& v); // NOLINT Value(const std::unordered_map& map); // NOLINT Value(std::unordered_map&& map); // NOLINT + Value(Vector&& v); // NOLINT + Value(const Vector& v); // NOLINT ~Value() { clear(); } @@ -213,6 +220,9 @@ struct Value { bool isDuration() const { return type_ == Type::DURATION; } + bool isVector() const { + return type_ == Type::VECTOR; + } void clear() { if (isNumeric()) { @@ -285,6 +295,9 @@ struct Value { void setDuration(const Duration& v); void setDuration(Duration&& v); void setDuration(std::unique_ptr&& v); + void setVector(const Vector& v); + void setVector(Vector&& v); + void setVector(std::unique_ptr&& v); const NullType& getNull() const { return value_.nVal; @@ -318,6 +331,8 @@ struct Value { const Geography* getGeographyPtr() const; const Duration& getDuration() const; const Duration* getDurationPtr() const; + const Vector& getVector() const; + const Vector* getVectorPtr() const; NullType moveNull(); bool moveBool(); @@ -336,6 +351,7 @@ struct Value { DataSet moveDataSet(); Geography moveGeography(); Duration moveDuration(); + Vector moveVector(); NullType& mutableNull(); bool& mutableBool(); @@ -354,6 +370,7 @@ struct Value { DataSet& mutableDataSet(); Geography& mutableGeography(); Duration& mutableDuration(); + Vector& mutableVector(); static const Value& null() noexcept { return kNullValue; @@ -404,6 +421,7 @@ struct Value { std::unique_ptr gVal; std::unique_ptr ggVal; std::unique_ptr duVal; + std::unique_ptr vecVal; Storage() {} ~Storage() {} @@ -483,6 +501,11 @@ struct Value { void setDU(std::unique_ptr&& v); void setDU(const Duration& v); void setDU(Duration&& v); + // Vector value + void setVec(const std::unique_ptr& v); + void setVec(std::unique_ptr&& v); + void setVec(const Vector& v); + void setVec(Vector&& v); }; static_assert(sizeof(Value) == 16UL, "The size of Value should be 16UL"); diff --git a/src/common/datatypes/ValueOps-inl.h b/src/common/datatypes/ValueOps-inl.h index 08ff4205f2..1d18e3fb04 100644 --- a/src/common/datatypes/ValueOps-inl.h +++ b/src/common/datatypes/ValueOps-inl.h @@ -26,6 +26,7 @@ #include "common/datatypes/PathOps-inl.h" #include "common/datatypes/SetOps-inl.h" #include "common/datatypes/Value.h" +#include "common/datatypes/VectorOps-inl.h" #include "common/datatypes/VertexOps-inl.h" namespace apache { @@ -89,6 +90,9 @@ struct TccStructTraits { } else if (_fname == "duVal") { fid = 17; _ftype = apache::thrift::protocol::T_STRUCT; + } else if (_fname == "vecVal") { + fid = 18; + _ftype = apache::thrift::protocol::T_STRUCT; } } }; @@ -264,6 +268,19 @@ uint32_t Cpp2Ops::write(Protocol* proto, nebula::Value const* obj xfer += proto->writeFieldEnd(); break; } + case nebula::Value::Type::VECTOR: { + xfer += proto->writeFieldBegin("vecVal", protocol::T_STRUCT, 18); + if (obj->getVectorPtr()) { + xfer += Cpp2Ops::write(proto, obj->getVectorPtr()); + } else { + xfer += proto->writeStructBegin("Vector"); + xfer += proto->writeStructEnd(); + xfer += proto->writeFieldStop(); + } + xfer += proto->writeFieldEnd(); + break; + } + case nebula::Value::Type::__EMPTY__: { break; } @@ -466,6 +483,18 @@ void Cpp2Ops::read(Protocol* proto, nebula::Value* obj) { } break; } + case 18: { + // Vector type + if (readState.fieldType == apache::thrift::protocol::T_STRUCT) { + obj->setVector(nebula::Vector()); + auto ptr = std::make_unique(); + Cpp2Ops::read(proto, ptr.get()); + obj->setVector(std::move(ptr)); + } else { + proto->skip(readState.fieldType); + } + break; + } default: { proto->skip(readState.fieldType); break; @@ -620,6 +649,16 @@ uint32_t Cpp2Ops::serializedSize(Protocol const* proto, nebula::V } break; } + case nebula::Value::Type::VECTOR: { + xfer += proto->serializedFieldSize("vecVal", protocol::T_STRUCT, 18); + if (obj->getVectorPtr()) { + xfer += Cpp2Ops::serializedSize(proto, obj->getVectorPtr()); + } else { + xfer += proto->serializedStructSize("Vector"); + xfer += proto->serializedSizeStop(); + } + break; + } case nebula::Value::Type::__EMPTY__: { break; } @@ -768,6 +807,16 @@ uint32_t Cpp2Ops::serializedSizeZC(Protocol const* proto, nebula: } break; } + case nebula::Value::Type::VECTOR: { + xfer += proto->serializedFieldSize("vecVal", protocol::T_STRUCT, 18); + if (obj->getVectorPtr()) { + xfer += Cpp2Ops::serializedSizeZC(proto, obj->getVectorPtr()); + } else { + xfer += proto->serializedStructSize("Vector"); + xfer += proto->serializedSizeStop(); + } + break; + } case nebula::Value::Type::__EMPTY__: { break; } diff --git a/src/common/datatypes/Vector.cpp b/src/common/datatypes/Vector.cpp new file mode 100644 index 0000000000..e074d226a5 --- /dev/null +++ b/src/common/datatypes/Vector.cpp @@ -0,0 +1,34 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "common/datatypes/Vector.h" + +#include + +#include + +namespace nebula { + +std::string Vector::toString() const { + std::vector value(values.size()); + std::transform(values.begin(), values.end(), value.begin(), [](const auto& v) -> std::string { + return std::to_string(v); + }); + std::stringstream os; + os << "[" << folly::join(",", value) << "]"; + return os.str(); +} + +folly::dynamic Vector::toJson() const { + auto listJsonObj = folly::dynamic::array(); + + for (const auto& val : values) { + listJsonObj.push_back(val); + } + + return listJsonObj; +} + +} // namespace nebula diff --git a/src/common/datatypes/Vector.h b/src/common/datatypes/Vector.h new file mode 100644 index 0000000000..f00158fab9 --- /dev/null +++ b/src/common/datatypes/Vector.h @@ -0,0 +1,131 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef COMMON_DATATYPES_VECTOR_H_ +#define COMMON_DATATYPES_VECTOR_H_ +#include + +#include +#include +#include +#include +#include +namespace nebula { +/* + * Vector Type is consist of a vector of values and dimension of vector. + * The element type in vector is float. + */ +struct Vector { + std::vector values; + + Vector() = default; + Vector(const Vector& rhs) = default; + Vector(Vector&& rhs) noexcept = default; + explicit Vector(std::vector vals) : values(std::move(vals)) {} + + Vector& operator=(const Vector& rhs) { + if (this == &rhs) { + return *this; + } + values = rhs.values; + return *this; + } + + Vector& operator=(Vector&& rhs) noexcept { + if (this == &rhs) { + return *this; + } + values = std::move(rhs.values); + return *this; + } + + size_t dim() const { + return values.size(); + } + + std::vector data() const { + return values; + } + + inline static bool floatcmp(float a, + float b, + double relative_tolerance = 1e-6, + double absolute_tolerance = 1e-6) { + if (a == b) { + return true; + } + double diff = std::fabs(a - b); + if (diff <= absolute_tolerance) { + return true; + } + return diff <= relative_tolerance * std::max(std::fabs(a), std::fabs(b)); + } + + bool operator==(const Vector& rhs) const { + size_t dimension = values.size(); + for (size_t i = 0; i < static_cast(dimension); ++i) { + if (!floatcmp(values[i], rhs.values[i])) { + return false; + } + } + return true; + } + + std::string toString() const; + folly::dynamic toJson() const; +}; + +inline std::ostream& operator<<(std::ostream& os, const Vector& s) { + return os << s.toString(); +} + +} // namespace nebula + +namespace std { +template <> +struct hash { + std::size_t operator()(const nebula::Vector& h) const { + if (h.values.size() == 1) { + return std::hash()(h.values[0]); + } + size_t seed = 0; + for (auto& v : h.values) { + seed ^= hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + +template <> +struct equal_to { + bool operator()(const nebula::Vector* lhs, const nebula::Vector* rhs) const { + return lhs == rhs ? true : (lhs != nullptr) && (rhs != nullptr) && (*lhs == *rhs); + } +}; + +template <> +struct equal_to { + bool operator()(const nebula::Vector* lhs, const nebula::Vector* rhs) const { + return lhs == rhs ? true : (lhs != nullptr) && (rhs != nullptr) && (*lhs == *rhs); + } +}; + +template <> +struct hash { + size_t operator()(const nebula::Vector* row) const { + return !row ? 0 : hash()(*row); + } +}; + +template <> +struct hash { + size_t operator()(const nebula::Vector* row) const { + return !row ? 0 : hash()(*row); + } +}; + +} // namespace std + +#endif // COMMON_DATATYPES_VECTOR_H_ diff --git a/src/common/datatypes/VectorOps-inl.h b/src/common/datatypes/VectorOps-inl.h new file mode 100644 index 0000000000..ce4535e27f --- /dev/null +++ b/src/common/datatypes/VectorOps-inl.h @@ -0,0 +1,136 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef COMMON_DATATYPES_VectorOPS_H_ +#define COMMON_DATATYPES_VectorOPS_H_ + +#include +#include +#include +#include +#include + +#include "common/base/Base.h" +#include "common/datatypes/CommonCpp2Ops.h" +#include "common/datatypes/Vector.h" + +namespace apache { +namespace thrift { + +namespace detail { + +template <> +struct TccStructTraits { + static void translateFieldName(MAYBE_UNUSED folly::StringPiece _fname, + MAYBE_UNUSED int16_t& fid, + MAYBE_UNUSED apache::thrift::protocol::TType& _ftype) { + if (_fname == "values") { + fid = 1; + _ftype = apache::thrift::protocol::T_LIST; + } + } +}; + +} // namespace detail + +inline constexpr protocol::TType Cpp2Ops::thriftType() { + return apache::thrift::protocol::T_STRUCT; +} + +template +uint32_t Cpp2Ops::write(Protocol* proto, nebula::Vector const* obj) { + uint32_t xfer = 0; + xfer += proto->writeStructBegin("Vector"); + + xfer += proto->writeFieldBegin("values", apache::thrift::protocol::T_LIST, 1); + xfer += detail::pm::protocol_methods, + std::vector>::write(*proto, obj->values); + xfer += proto->writeFieldEnd(); + + xfer += proto->writeFieldStop(); + xfer += proto->writeStructEnd(); + return xfer; +} + +template +void Cpp2Ops::read(Protocol* proto, nebula::Vector* obj) { + apache::thrift::detail::ProtocolReaderStructReadState readState; + + readState.readStructBegin(proto); + + using apache::thrift::protocol::TProtocolException; + + if (UNLIKELY(!readState.advanceToNextField(proto, 0, 1, protocol::T_I64))) { + goto _loop; + } + +_readField_values : { + obj->values = std::vector(); + detail::pm::protocol_methods, + std::vector>::read(*proto, obj->values); +} + + if (UNLIKELY(!readState.advanceToNextField(proto, 1, 0, protocol::T_STOP))) { + goto _loop; + } + +_end: + readState.readStructEnd(proto); + + return; + +_loop: + if (readState.fieldType == apache::thrift::protocol::T_STOP) { + goto _end; + } + + switch (readState.fieldId) { + case 1: { + if (LIKELY(readState.fieldType == apache::thrift::protocol::T_LIST)) { + goto _readField_values; + } else { + goto _skip; + } + } + default: { +_skip: + proto->skip(readState.fieldType); + readState.readFieldEnd(proto); + readState.readFieldBeginNoInline(proto); + goto _loop; + } + } +} + +template +uint32_t Cpp2Ops::serializedSize(Protocol const* proto, nebula::Vector const* obj) { + uint32_t xfer = 0; + xfer += proto->serializedStructSize("Vector"); + + xfer += proto->serializedFieldSize("values", apache::thrift::protocol::T_LIST, 2); + xfer += + detail::pm::protocol_methods, + std::vector>::serializedSize(*proto, obj->values); + xfer += proto->serializedSizeStop(); + return xfer; +} + +template +uint32_t Cpp2Ops::serializedSizeZC(Protocol const* proto, + nebula::Vector const* obj) { + uint32_t xfer = 0; + xfer += proto->serializedStructSize("Vector"); + + xfer += proto->serializedFieldSize("values", apache::thrift::protocol::T_LIST, 2); + xfer += + detail::pm::protocol_methods, + std::vector>::serializedSize(*proto, obj->values); + xfer += proto->serializedSizeStop(); + return xfer; +} + +} // namespace thrift +} // namespace apache +#endif // COMMON_DATATYPES_VectorOPS_H_ diff --git a/src/common/datatypes/test/CMakeLists.txt b/src/common/datatypes/test/CMakeLists.txt index 557f51edf5..01eed2e9af 100644 --- a/src/common/datatypes/test/CMakeLists.txt +++ b/src/common/datatypes/test/CMakeLists.txt @@ -103,6 +103,19 @@ nebula_add_test( gtest ) +nebula_add_test( + NAME + vector_test + SOURCES + VectorTest.cpp + OBJECTS + $ + $ + $ + LIBRARIES + gtest +) + nebula_add_test( NAME value_to_json_test diff --git a/src/common/datatypes/test/ValueTest.cpp b/src/common/datatypes/test/ValueTest.cpp index 73abce8464..c6983d2201 100644 --- a/src/common/datatypes/test/ValueTest.cpp +++ b/src/common/datatypes/test/ValueTest.cpp @@ -1560,6 +1560,7 @@ TEST(Value, typeName) { EXPECT_EQ("dataset", Value(DataSet()).typeName()); EXPECT_EQ("geography", Value(Geography()).typeName()); EXPECT_EQ("duration", Value(Duration()).typeName()); + EXPECT_EQ("vector", Value(Vector()).typeName()); EXPECT_EQ("__NULL__", Value::kNullValue.typeName()); EXPECT_EQ("NaN", Value::kNullNaN.typeName()); EXPECT_EQ("BAD_DATA", Value::kNullBadData.typeName()); @@ -1567,6 +1568,7 @@ TEST(Value, typeName) { EXPECT_EQ("ERR_OVERFLOW", Value::kNullOverflow.typeName()); EXPECT_EQ("UNKNOWN_PROP", Value::kNullUnknownProp.typeName()); EXPECT_EQ("DIV_BY_ZERO", Value::kNullDivByZero.typeName()); + EXPECT_EQ("VEC_DIM_NOT_MATCH", Value::kVectorDimNotMatch.typeName()); } using serializer = apache::thrift::CompactSerializer; @@ -1672,6 +1674,10 @@ TEST(Value, DecodeEncode) { Value(Duration()), Value(Duration(1, 2, 3)), Value(Duration(-1, -2, -3)), + + // Vector + Value(Vector({1, 2, 3})), + Value(Vector({1, 2, 3, 4})), }; for (const auto& val : values) { std::string buf; @@ -1727,6 +1733,10 @@ TEST(Value, Ctor) { Value vD{Duration()}; Value vD2{Duration(1, 2, 3)}; + // Vector + Value vVec1{Vector({1, 2, 3})}; + EXPECT_TRUE(vVec1.isVector()); + // Disabled // Lead to compile error // Value v(nullptr); @@ -1753,6 +1763,11 @@ TEST(Value, ToString) { d.addMicroseconds(20000000); EXPECT_EQ(d.toString(), "P14MT43200030.000000000S"); } + // Vector + { + Value vVec1{Vector({1, 2, 3})}; + EXPECT_EQ(vVec1.toString(), "[1.000000,2.000000,3.000000]"); + } } TEST(Value, DedupByValuePointer) { diff --git a/src/common/datatypes/test/VectorTest.cpp b/src/common/datatypes/test/VectorTest.cpp new file mode 100644 index 0000000000..00d1773492 --- /dev/null +++ b/src/common/datatypes/test/VectorTest.cpp @@ -0,0 +1,28 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include + +#include "common/base/Base.h" +#include "common/datatypes/Vector.h" + +TEST(VectorTest, Basic) { + nebula::Vector shortVec1({1.11, 2.22, 3.33}); + nebula::Vector emptyVec; + // we will test dimension in ValueTest.cpp + // so here we assume the dimensions of two vectors are equal + + EXPECT_EQ(shortVec1.dim(), 3); + EXPECT_EQ(emptyVec.dim(), 0); + EXPECT_EQ(shortVec1, nebula::Vector({1.11, 2.22, 3.33})); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + folly::init(&argc, &argv, true); + google::SetStderrLogging(google::INFO); + + return RUN_ALL_TESTS(); +} diff --git a/src/common/meta/NebulaSchemaProvider.cpp b/src/common/meta/NebulaSchemaProvider.cpp index 40475a1f67..c2a375d7b0 100644 --- a/src/common/meta/NebulaSchemaProvider.cpp +++ b/src/common/meta/NebulaSchemaProvider.cpp @@ -175,6 +175,9 @@ std::size_t NebulaSchemaProvider::fieldSize(PropertyType type, std::size_t fixed case PropertyType::SET_INT: case PropertyType::SET_FLOAT: return 8; + case PropertyType::VECTOR: { + return 8; + } case PropertyType::UNKNOWN: break; } diff --git a/src/common/utils/IndexKeyUtils.cpp b/src/common/utils/IndexKeyUtils.cpp index 0889e0b183..4ef28faaf1 100644 --- a/src/common/utils/IndexKeyUtils.cpp +++ b/src/common/utils/IndexKeyUtils.cpp @@ -258,6 +258,9 @@ Status IndexKeyUtils::checkValue(const Value& v, bool isNullable) { case nebula::NullType::OUT_OF_RANGE: { return Status::Error("Out of range"); } + case nebula::NullType::VEC_DIM_NOT_MATCH: { + return Status::Error("Vector dimension not match"); + } } DLOG(FATAL) << "Unknown Null type " << static_cast(v.getNull()); return Status::Error("Unknown Null type"); diff --git a/src/common/utils/IndexKeyUtils.h b/src/common/utils/IndexKeyUtils.h index ff4d058a82..381f454584 100644 --- a/src/common/utils/IndexKeyUtils.h +++ b/src/common/utils/IndexKeyUtils.h @@ -62,6 +62,8 @@ class IndexKeyUtils final { case PropertyType::SET_INT: case PropertyType::SET_FLOAT: return Value::Type::SET; + case PropertyType::VECTOR: + return Value::Type::VECTOR; case PropertyType::UNKNOWN: return Value::Type::__EMPTY__; } @@ -103,6 +105,8 @@ class IndexKeyUtils final { len = sizeof(uint64_t); // S2CellId break; } + case Value::Type::VECTOR: { + } default: LOG(ERROR) << "Unsupported default value type"; } diff --git a/src/graph/util/SchemaUtil.cpp b/src/graph/util/SchemaUtil.cpp index f6a62feaaf..e3d1689d4b 100644 --- a/src/graph/util/SchemaUtil.cpp +++ b/src/graph/util/SchemaUtil.cpp @@ -313,6 +313,8 @@ Value::Type SchemaUtil::propTypeToValueType(nebula::cpp2::PropertyType propType) case nebula::cpp2::PropertyType::SET_INT: case nebula::cpp2::PropertyType::SET_FLOAT: return Value::Type::SET; + case nebula::cpp2::PropertyType::VECTOR: + return Value::Type::VECTOR; case nebula::cpp2::PropertyType::UNKNOWN: return Value::Type::__EMPTY__; } diff --git a/src/graph/util/ToJson.cpp b/src/graph/util/ToJson.cpp index 5c6b574690..c93663907d 100644 --- a/src/graph/util/ToJson.cpp +++ b/src/graph/util/ToJson.cpp @@ -79,6 +79,7 @@ folly::dynamic toJson(const Value &value) { case Value::Type::SET: case Value::Type::DATASET: case Value::Type::GEOGRAPHY: + case Value::Type::VECTOR: // TODO store to object or array return value.toString(); } diff --git a/src/interface/common.thrift b/src/interface/common.thrift index 2acbb1ff61..91c9e6e47d 100644 --- a/src/interface/common.thrift +++ b/src/interface/common.thrift @@ -26,6 +26,7 @@ cpp_include "common/datatypes/KeyValueOps-inl.h" cpp_include "common/datatypes/HostAddrOps-inl.h" cpp_include "common/datatypes/GeographyOps-inl.h" cpp_include "common/datatypes/DurationOps-inl.h" +cpp_include "common/datatypes/VectorOps-inl.h" /* * @@ -98,6 +99,7 @@ enum NullType { UNKNOWN_PROP = 5, DIV_BY_ZERO = 6, OUT_OF_RANGE = 7, + VEC_DIM_NOT_MATCH = 8, } (cpp.enum_strict, cpp.type = "nebula::NullType") @@ -120,8 +122,13 @@ union Value { 15: DataSet (cpp.type = "nebula::DataSet") gVal (cpp.ref_type = "unique"); 16: Geography (cpp.type = "nebula::Geography") ggVal (cpp.ref_type = "unique"); 17: Duration (cpp.type = "nebula::Duration") duVal (cpp.ref_type = "unique"); + 18: Vector (cpp.type = "nebula::Vector") vecVal (cpp.ref_type = "unique"); } (cpp.type = "nebula::Value") +// Vector type +struct Vector { + 1: list values; +} (cpp.type = "nebula::Vector") // Ordered list struct NList { @@ -302,6 +309,9 @@ enum PropertyType { SET_INT = 36, SET_FLOAT = 37, + // Vector type + VECTOR = 38, + } (cpp.enum_strict) /* diff --git a/src/meta/processors/schema/SchemaUtil.cpp b/src/meta/processors/schema/SchemaUtil.cpp index 0bdcc38e7f..3785e10bd2 100644 --- a/src/meta/processors/schema/SchemaUtil.cpp +++ b/src/meta/processors/schema/SchemaUtil.cpp @@ -276,6 +276,12 @@ bool SchemaUtil::checkType(std::vector& columns) { case PropertyType::SET_FLOAT: { return extractIntOrFloat(value, name); } + case PropertyType::VECTOR: { + // detect column dim and value dim + return column.get_type().type_length_ref().has_value() && value.isVector() && + value.getVector().dim() == + static_cast(column.get_type().type_length_ref().value()); + } case PropertyType::UNKNOWN: case PropertyType::VID: DLOG(INFO) << "Don't supported type "