diff --git a/.proj.toml b/.proj.toml index b14d763339..359d96f580 100644 --- a/.proj.toml +++ b/.proj.toml @@ -70,6 +70,13 @@ has-cpu-only-benchmarks = false has-cuda-tests = true has-cuda-benchmarks = false +[targets.local-pcg-execution] +type = "lib" +has-cpu-only-tests = true +has-cpu-only-benchmarks = false +has-cuda-tests = false +has-cuda-benchmarks = false + [targets.models] type = "lib" has-cpu-only-tests = true diff --git a/CMakeLists.txt b/CMakeLists.txt index a5f5a6fa11..8b313f5d4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,7 +102,6 @@ include(spdlog) include(doctestlib) # named doctestlib to avoid a name collision with doctest.cmake in rapidcheck include(gbenchmark) include(libassert) -include(visit_struct) include(CTest) include(fmt) include(legion) diff --git a/cmake/visit_struct.cmake b/cmake/visit_struct.cmake deleted file mode 100644 index 108745fc14..0000000000 --- a/cmake/visit_struct.cmake +++ /dev/null @@ -1,16 +0,0 @@ -add_library( - visit_struct - INTERFACE -) -target_include_directories( - visit_struct - INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/deps/visit_struct/include/ -) -set_target_properties( - visit_struct - PROPERTIES - CXX_STANDARD 11 - CXX_STANDARD_REQUIRED YES - CXX_EXTENSIONS NO -) diff --git a/flake.lock b/flake.lock index f016c47f45..1a35c5f043 100644 --- a/flake.lock +++ b/flake.lock @@ -66,11 +66,11 @@ ] }, "locked": { - "lastModified": 1752259929, - "narHash": "sha256-GkMRIi6Xk3qswrbekWtO1sQYz61mw25+62boDk1Gd7s=", + "lastModified": 1759555675, + "narHash": "sha256-B+L87KggnpcYpHas6fEoqqG7gfXIyQpcA1SfXi5y5c8=", "ref": "refs/heads/master", - "rev": "669773600c781ab8b29ac2379d0c070721417f9d", - "revCount": 117, + "rev": "950f2e78aa913e1a1dfaffbbb0867bc3560a43c9", + "revCount": 121, "type": "git", "url": "https://git.sr.ht/~lockshaw/proj" }, diff --git a/flake.nix b/flake.nix index 474a22f385..6201cc935e 100644 --- a/flake.nix +++ b/flake.nix @@ -93,9 +93,6 @@ -DFF_USE_EXTERNAL_EXPECTED=ON \ -DFF_USE_EXTERNAL_GBENCHMARK=ON \ -DFF_USE_EXTERNAL_LIBASSERT=ON \ - -DFF_USE_EXTERNAL_RANGEV3=ON \ - -DFF_USE_EXTERNAL_BOOST_PREPROCESSOR=ON \ - -DFF_USE_EXTERNAL_TYPE_INDEX=ON" ''; buildInputs = builtins.concatLists [ diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index e2e561c384..2e71e577c0 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(runtime) add_subdirectory(op-attrs) add_subdirectory(kernels) add_subdirectory(local-execution) +add_subdirectory(local-pcg-execution) add_subdirectory(task-spec) add_subdirectory(utils) add_subdirectory(ffi) diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h index 9bb73fd1a9..eb5b057879 100644 --- a/lib/compiler/include/compiler/allowed_machine_views.h +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -1,18 +1,18 @@ #ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H #define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H -#include "pcg/machine_specification.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" #include "pcg/machine_view.dtg.h" -#include "pcg/operator_task_space.dtg.h" namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, OperatorTaskSpace const &task, - MachineSpecification const &ms); + MachineComputeSpecification const &ms); std::unordered_set - get_allowed_machine_views(MachineSpecification const &machine_spec, + get_allowed_machine_views(MachineComputeSpecification const &machine_spec, OperatorTaskSpace const &task, DeviceType device_type); diff --git a/lib/compiler/include/compiler/cost_estimator/communication_edge.h b/lib/compiler/include/compiler/cost_estimator/communication_edge.h new file mode 100644 index 0000000000..1452ee38b0 --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/communication_edge.h @@ -0,0 +1,50 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_COMMUNICATION_EDGE_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_COMMUNICATION_EDGE_H + +#include "pcg/machine_space_coordinate.dtg.h" + +namespace FlexFlow { + +struct CommunicationEdge { + CommunicationEdge() = delete; + + CommunicationEdge(MachineSpaceCoordinate const &src, + MachineSpaceCoordinate const &dst); + + bool operator==(CommunicationEdge const &) const; + bool operator!=(CommunicationEdge const &) const; + + bool operator<(CommunicationEdge const &) const; + bool operator>(CommunicationEdge const &) const; + bool operator<=(CommunicationEdge const &) const; + bool operator>=(CommunicationEdge const &) const; + + MachineSpaceCoordinate const &get_src() const; + MachineSpaceCoordinate const &get_dst() const; +private: + MachineSpaceCoordinate src; + MachineSpaceCoordinate dst; +private: + std::tuple< + decltype(src) const &, + decltype(dst) const & + > tie() const; + + friend struct ::std::hash; +}; + +std::string format_as(CommunicationEdge const &); +std::ostream &operator<<(std::ostream &, CommunicationEdge const &); + +} // namespace FlexFlow + +namespace std { + +template <> +struct hash<::FlexFlow::CommunicationEdge> { + size_t operator()(::FlexFlow::CommunicationEdge const &) const; +}; + +} + +#endif diff --git a/lib/compiler/include/compiler/cost_estimator/network_cost_model.h b/lib/compiler/include/compiler/cost_estimator/network_cost_model.h new file mode 100644 index 0000000000..3772acd54a --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/network_cost_model.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_NETWORK_COST_MODEL_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_NETWORK_COST_MODEL_H + +#include "compiler/cost_estimator/tensor_set_movement.dtg.h" +#include "pcg/machine_specification.dtg.h" + +namespace FlexFlow { + +float estimate_communication_cost(MachineSpecification const &, + TensorSetMovement const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.h b/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.h index f2d12aff71..aa638f7287 100644 --- a/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.h +++ b/lib/compiler/include/compiler/cost_estimator/op_cost_metrics.h @@ -6,6 +6,9 @@ namespace FlexFlow { +bool is_pareto_optimal_in(OpCostMetrics const &, + std::unordered_set const &); + OpCostMetrics make_op_cost_metrics_from_runtime_only( RuntimeOnlyOpCostMetrics const &runtime_only, num_bytes_t const &memory_usage); diff --git a/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.cc b/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.cc new file mode 100644 index 0000000000..da4d7b1894 --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.cc @@ -0,0 +1,45 @@ +#include "compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "utils/bidict/algorithms/exhaustive_relational_join.h" +#include +#include "op-attrs/task_space_coordinate.h" +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "utils/bidict/algorithms/transform_keys.h" +#include "utils/bidict/algorithms/transform_values.h" + +namespace FlexFlow { + +ParallelTensorSpaceToMachineSpaceMapping + ptensor_machine_map_from_composition( + OperatorSpaceToMachineSpaceMapping const &op_task_to_machine_space_mapping, + OperatorSpaceToParallelTensorSpaceMapping const &op_task_to_parallel_tensor_space_mapping) { + ASSERT( + op_task_to_machine_space_mapping.operator_task_space + == + get_operator_task_space_for_mapping(op_task_to_parallel_tensor_space_mapping) + ); + + bidict + pt_to_op_coord_map = + transform_keys( + transform_values( + op_task_to_parallel_tensor_space_mapping.raw_mapping.coord_mapping.reversed(), + task_space_coordinate_from_dim_coord), + parallel_tensor_space_coord_from_dim_coord); + + bidict + op_to_ms_coord_map = + op_task_to_machine_space_mapping.raw_mapping; + + return ParallelTensorSpaceToMachineSpaceMapping{ + /*raw_mapping=*/exhaustive_relational_join( + pt_to_op_coord_map, + op_to_ms_coord_map), + /*parallel_tensor_space=*/ + parallel_tensor_dim_degrees_from_dim_domain( + op_task_to_parallel_tensor_space_mapping.raw_mapping.r_domain), + }; +}; + +} // namespace FlexFlow diff --git a/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.h b/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.h new file mode 100644 index 0000000000..caaad8755a --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_PARALLEL_TENSOR_SPACE_TO_MACHINE_SPACE_MAPPING_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_PARALLEL_TENSOR_SPACE_TO_MACHINE_SPACE_MAPPING_H + +#include "compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.dtg.h" +#include "pcg/operator_space_to_machine_space_mapping.dtg.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" + +namespace FlexFlow { + +ParallelTensorSpaceToMachineSpaceMapping + ptensor_machine_map_from_composition( + OperatorSpaceToMachineSpaceMapping const &op_task_to_machine_space_mapping, + OperatorSpaceToParallelTensorSpaceMapping const &op_task_to_parallel); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.struct.toml b/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.struct.toml new file mode 100644 index 0000000000..f64e405757 --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/parallel_tensor_space_to_machine_space_mapping.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "ParallelTensorSpaceToMachineSpaceMapping" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "pcg/machine_space_coordinate.dtg.h", + "op-attrs/parallel_tensor_dim_degrees.dtg.h", + "utils/bidict/bidict.h", +] + +[[fields]] +name = "raw_mapping" +type = "::FlexFlow::bidict<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::MachineSpaceCoordinate>" + +[[fields]] +name = "parallel_tensor_space" +type = "::FlexFlow::ParallelTensorDimDegrees" diff --git a/lib/compiler/include/compiler/cost_estimator/single_communication.struct.toml b/lib/compiler/include/compiler/cost_estimator/single_communication.struct.toml new file mode 100644 index 0000000000..696bbd990f --- /dev/null +++ b/lib/compiler/include/compiler/cost_estimator/single_communication.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "SingleCommunication" +features = [ + "eq", + "ord", + "hash", + "fmt", + "json", +] + +includes = [ + "pcg/machine_space_coordinate.dtg.h", +] + +[[fields]] +name = "src_machine_coord" +type = "::FlexFlow::MachineSpaceCoordinate" + +[[fields]] +name = "dst_machine_coord" +type = "::FlexFlow::MachineSpaceCoordinate" diff --git a/lib/compiler/include/compiler/cost_estimator/single_tensor_movement.struct.toml b/lib/compiler/include/compiler/cost_estimator/single_tensor_movement.struct.toml index 70f73ebe51..8344ccbd75 100644 --- a/lib/compiler/include/compiler/cost_estimator/single_tensor_movement.struct.toml +++ b/lib/compiler/include/compiler/cost_estimator/single_tensor_movement.struct.toml @@ -7,24 +7,16 @@ features = [ ] includes = [ - "op-attrs/parallel_tensor_shape.dtg.h", - "pcg/machine_view.dtg.h", - "", + "compiler/cost_estimator/communication_edge.h", + "utils/units/num_bytes_t.h", + "", ] src_includes = [ - "utils/hash/unordered_set.h", - "utils/fmt/unordered_set.h", + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", ] [[fields]] -name = "parallel_tensor_shape" -type = "::FlexFlow::ParallelTensorShape" - -[[fields]] -name = "src_machine_views" -type = "std::unordered_set<::FlexFlow::MachineView>" - -[[fields]] -name = "dst_machine_views" -type = "std::unordered_set<::FlexFlow::MachineView>" +name = "edge_to_size" +type = "std::unordered_map<::FlexFlow::CommunicationEdge, ::FlexFlow::num_bytes_t>" diff --git a/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h b/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h index 34188ff97c..cdc2c73173 100644 --- a/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h +++ b/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.h @@ -8,7 +8,11 @@ namespace FlexFlow { -TensorSetMovement get_tensor_set_movement_from_pcg_edge( +TensorSetMovement + empty_tensor_set_movement(); + +TensorSetMovement + get_tensor_set_movement_from_pcg_edge( ParallelComputationGraphEdge const &edge, ParallelComputationGraph const &pcg, MachineView const &src_mv, diff --git a/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.struct.toml b/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.struct.toml index 3625605239..90b88b3dd3 100644 --- a/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.struct.toml +++ b/lib/compiler/include/compiler/cost_estimator/tensor_set_movement.struct.toml @@ -7,15 +7,16 @@ features = [ ] includes = [ - "compiler/cost_estimator/single_tensor_movement.dtg.h", - "", + "compiler/cost_estimator/communication_edge.h", + "utils/units/num_bytes_t.h", + "", ] src_includes = [ - "utils/fmt/unordered_multiset.h", - "utils/hash/unordered_multiset.h", + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", ] [[fields]] -name = "single_tensor_movements" -type = "std::unordered_multiset<::FlexFlow::SingleTensorMovement>" +name = "edge_to_size" +type = "std::unordered_map<::FlexFlow::CommunicationEdge, ::FlexFlow::num_bytes_t>" diff --git a/lib/compiler/include/compiler/graph_optimize_result.h b/lib/compiler/include/compiler/graph_optimize_result.h deleted file mode 100644 index f3843e2a93..0000000000 --- a/lib/compiler/include/compiler/graph_optimize_result.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_GRAPH_OPTIMIZE_RESULT_H -#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_GRAPH_OPTIMIZE_RESULT_H - -#include "compiler/graph_optimize_result.dtg.h" - -namespace FlexFlow { - -std::string format_as(GraphOptimizeResult const &); -std::ostream &operator<<(std::ostream &, GraphOptimizeResult const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/compiler/include/compiler/graph_optimize_result.struct.toml b/lib/compiler/include/compiler/graph_optimize_result.struct.toml index 22f29cbd59..4753713c99 100644 --- a/lib/compiler/include/compiler/graph_optimize_result.struct.toml +++ b/lib/compiler/include/compiler/graph_optimize_result.struct.toml @@ -1,16 +1,17 @@ namespace = "FlexFlow" name = "GraphOptimizeResult" -features = [ ] +features = [ + "fmt", +] includes = [ - "compiler/machine_mapping/machine_mapping.dtg.h", - "pcg/parallel_computation_graph/parallel_computation_graph.h" + "compiler/mapped_parallel_computation_graph.dtg.h", ] -[[fields]] -name = "pcg" -type = "::FlexFlow::ParallelComputationGraph" +src_includes = [ + "compiler/mapped_parallel_computation_graph.h", +] [[fields]] -name = "machine_mapping" -type = "::FlexFlow::MachineMapping" +name = "mapped_pcg" +type = "::FlexFlow::MappedParallelComputationGraph" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_communication_edge.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_communication_edge.struct.toml new file mode 100644 index 0000000000..6ba1f3139b --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_communication_edge.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "AbstractedCommunicationEdge" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.dtg.h", +] + +[[fields]] +name = "src" +type = "::FlexFlow::AbstractedDevice" + +[[fields]] +name = "dst" +type = "::FlexFlow::AbstractedDevice" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.h b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.h new file mode 100644 index 0000000000..7f5313eba6 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_ABSTRACTED_DEVICE_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_ABSTRACTED_DEVICE_H + +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" +#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h" + +namespace FlexFlow { + +MachineSpaceCoordinate + concretize_abstracted_device( + AbstractedDevice const &abstracted_device, + std::unordered_map const &machine_space_stencils); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.struct.toml new file mode 100644 index 0000000000..061972c649 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "AbstractedDevice" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "op-attrs/task_space_coordinate.dtg.h", + "utils/full_binary_tree/binary_tree_path.dtg.h", +] + +[[fields]] +name = "operator_tree_path" +type = "::FlexFlow::BinaryTreePath" + +[[fields]] +name = "task_space_coordinate" +type = "::FlexFlow::TaskSpaceCoordinate" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_communication.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_communication.struct.toml new file mode 100644 index 0000000000..0a5abd6a06 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_communication.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "AbstractedSingleCommunication" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_communication_edge.dtg.h", + "utils/units/num_bytes_t.h", +] + +[[fields]] +name = "edge" +type = "::FlexFlow::AbstractedCommunicationEdge" + +[[fields]] +name = "size" +type = "::FlexFlow::num_bytes_t" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication.struct.toml new file mode 100644 index 0000000000..0df7d803d6 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "AbstractedSingleTensorCommunication" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.dtg.h", + "utils/units/num_bytes_t.h", +] + +[[fields]] +name = "edge" +type = "::FlexFlow::AbstractedSingleTensorCommunicationEdge" + +[[fields]] +name = "size" +type = "::FlexFlow::num_bytes_t" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.h b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.h new file mode 100644 index 0000000000..d6e7b49e17 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_ABSTRACTED_SINGLE_TENSOR_COMMUNICATION_EDGE_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_ABSTRACTED_SINGLE_TENSOR_COMMUNICATION_EDGE_H + +#include "compiler/cost_estimator/communication_edge.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h" +#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" +namespace FlexFlow { + +std::optional + concretize_abstracted_single_tensor_communication_edge( + AbstractedSingleTensorCommunicationEdge const &edge, + MachineSpaceStencil const &src_machine_stencil, + std::unordered_map const &dst_machine_stencils); + + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.struct.toml new file mode 100644 index 0000000000..f6c264ec59 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "AbstractedSingleTensorCommunicationEdge" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.dtg.h", +] + +[[fields]] +name = "src_coord" +type = "::FlexFlow::TaskSpaceCoordinate" + +[[fields]] +name = "dst" +type = "::FlexFlow::AbstractedDevice" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h new file mode 100644 index 0000000000..7789616ce6 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h @@ -0,0 +1,30 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_ABSTRACTED_SINGLE_TENSOR_MOVEMENT_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_ABSTRACTED_SINGLE_TENSOR_MOVEMENT_H + +#include "compiler/cost_estimator/tensor_set_movement.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication.dtg.h" + +namespace FlexFlow { + +std::unordered_set + abstracted_single_tensor_movement_get_dst_layers(AbstractedSingleTensorMovement const &); + +AbstractedSingleTensorMovement + merge_abstracted_single_tensor_movements( + std::unordered_multiset const &); + +AbstractedSingleTensorMovement + abstracted_single_tensor_movement_from_communications( + BinaryTreePath const &src_op_tree_path, + std::unordered_set const &communications); + +TensorSetMovement concretize_abstracted_single_tensor_movement( + AbstractedSingleTensorMovement const &, + std::unordered_map const &pre_machine_stencils, + std::unordered_map const &post_machine_stencils); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.struct.toml index 449a448706..62a1d37d57 100644 --- a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.struct.toml @@ -7,24 +7,20 @@ features = [ ] includes = [ - "op-attrs/parallel_tensor_shape.dtg.h", - "utils/full_binary_tree/binary_tree_path.dtg.h", - "", + "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.dtg.h", + "utils/units/num_bytes_t.h", + "", ] src_includes = [ - "utils/hash/unordered_set.h", - "utils/fmt/unordered_set.h", + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", ] [[fields]] -name = "parallel_tensor_shape" -type = "::FlexFlow::ParallelTensorShape" +name = "src_op_tree_path" +type = "::FlexFlow::BinaryTreePath" [[fields]] -name = "src_machine_views" -type = "std::unordered_set<::FlexFlow::BinaryTreePath>" - -[[fields]] -name = "dst_machine_views" -type = "std::unordered_set<::FlexFlow::BinaryTreePath>" +name = "edge_to_size" +type = "std::unordered_map<::FlexFlow::AbstractedSingleTensorCommunicationEdge, ::FlexFlow::num_bytes_t>" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h index 5b7e2f3613..81977e4ea7 100644 --- a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h @@ -3,13 +3,22 @@ #include "compiler/cost_estimator/tensor_set_movement.dtg.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_communication.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" #include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h" #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" namespace FlexFlow { AbstractedTensorSetMovement empty_abstracted_tensor_set_movement(); +AbstractedTensorSetMovement + abstracted_tensor_set_movement_from_single_tensor_movement( + AbstractedSingleTensorMovement const &); + std::unordered_set get_src_layers(AbstractedTensorSetMovement const &); std::unordered_set @@ -17,8 +26,8 @@ std::unordered_set TensorSetMovement concretize_abstracted_tensor_set_movement( AbstractedTensorSetMovement const &, - ParallelLayerGuidObliviousMachineMapping const &pre, - ParallelLayerGuidObliviousMachineMapping const &post); + std::unordered_map const &pre_machine_stencils, + std::unordered_map const &post_machine_stencils); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.struct.toml index 4cf184706b..cc30234d26 100644 --- a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.struct.toml @@ -7,15 +7,15 @@ features = [ ] includes = [ - "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.dtg.h", "", + "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.dtg.h", ] src_includes = [ - "utils/fmt/unordered_multiset.h", - "utils/hash/unordered_multiset.h", + "utils/fmt/unordered_set.h", + "utils/hash/unordered_set.h", ] [[fields]] name = "single_tensor_movements" -type = "std::unordered_multiset<::FlexFlow::AbstractedSingleTensorMovement>" +type = "std::unordered_set<::FlexFlow::AbstractedSingleTensorMovement>" diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h index 8567a7a3e6..0025691289 100644 --- a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h @@ -1,12 +1,21 @@ #ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_GET_ABSTRACTED_TENSOR_SET_MOVEMENT_ACROSS_SPLIT_H #define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_GET_ABSTRACTED_TENSOR_SET_MOVEMENT_ACROSS_SPLIT_H +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_communication.dtg.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.dtg.h" #include "compiler/machine_mapping/transitive_reduced_pcg.dtg.h" #include "compiler/series_parallel/pcg/pcg_binary_series_split.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" namespace FlexFlow { +AbstractedSingleTensorMovement + get_abstracted_single_tensor_movement_along_edge( + ParallelComputationGraph const &pcg, + ParallelComputationGraphEdge const &edge, + BinaryTreePath const &src_path, + BinaryTreePath const &dst_path); + AbstractedTensorSetMovement get_abstracted_tensor_set_movement_across_split( TransitiveReducedPCG const &transitive_reduced_pcg, PCGBinarySeriesSplit const &split); diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.h b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.h new file mode 100644 index 0000000000..656b9a2093 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_MACHINE_SPACE_STENCIL_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_ABSTRACTED_TENSOR_SET_MOVEMENT_MACHINE_SPACE_STENCIL_H + +#include "op-attrs/task_space_coordinate.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" + +namespace FlexFlow { + +MachineSpaceCoordinate + machine_space_stencil_compute_machine_coord( + MachineSpaceStencil const &, + TaskSpaceCoordinate const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.struct.toml b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.struct.toml new file mode 100644 index 0000000000..ea9f61c71f --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "MachineSpaceStencil" +features = [ + "eq", + "ord", + "hash", + "fmt", + "json", +] + +includes = [ + "op-attrs/operator_task_space.dtg.h", + "pcg/machine_view.dtg.h", +] + +[[fields]] +name = "operator_task_space" +type = "::FlexFlow::OperatorTaskSpace" + +[[fields]] +name = "machine_view" +type = "::FlexFlow::MachineView" diff --git a/lib/compiler/include/compiler/machine_mapping/get_machine_resource_splits.h b/lib/compiler/include/compiler/machine_mapping/get_machine_resource_splits.h deleted file mode 100644 index 990c1c8205..0000000000 --- a/lib/compiler/include/compiler/machine_mapping/get_machine_resource_splits.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_GET_MACHINE_RESOURCE_SPLITS_H -#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_GET_MACHINE_RESOURCE_SPLITS_H - -#include "pcg/machine_specification.dtg.h" -#include -#include - -namespace FlexFlow { - -std::unordered_set> - get_machine_resource_splits(MachineSpecification const &resource); - -} // namespace FlexFlow - -#endif diff --git a/lib/compiler/include/compiler/machine_mapping/get_optimal_machine_mapping.h b/lib/compiler/include/compiler/machine_mapping/get_optimal_machine_mapping.h index 2cd3f3e289..3e49899003 100644 --- a/lib/compiler/include/compiler/machine_mapping/get_optimal_machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping/get_optimal_machine_mapping.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_GET_OPTIMAL_MACHINE_MAPPING_H #define _FLEXFLOW_COMPILER_MACHINE_MAPPING_GET_OPTIMAL_MACHINE_MAPPING_H +#include "compiler/machine_mapping/machine_compute_resource_slice.dtg.h" #include "compiler/machine_mapping/machine_mapping_cache.dtg.h" #include "compiler/machine_mapping/machine_mapping_constraints.dtg.h" #include "compiler/machine_mapping/machine_mapping_context.dtg.h" @@ -8,7 +9,6 @@ #include "compiler/machine_mapping/machine_mapping_problem_tree/mm_problem_tree_parallel_split.dtg.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/mm_problem_tree_series_split.dtg.h" #include "compiler/machine_mapping/parallel_split_transformation.dtg.h" -#include "pcg/machine_specification.dtg.h" namespace FlexFlow { @@ -16,14 +16,14 @@ MachineMappingResult get_optimal_machine_mapping(MachineMappingCache &result_cache, MachineMappingContext const &context, MachineMappingProblemTree const &problem_tree, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints); MachineMappingResult get_optimal_machine_mapping(MachineMappingCache &result_cache, MachineMappingContext const &context, MMProblemTreeSeriesSplit const &series_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints, std::optional const ¶llel_split_transformation); @@ -32,14 +32,14 @@ MachineMappingResult get_optimal_machine_mapping( MachineMappingCache &result_cache, MachineMappingContext const &context, MMProblemTreeParallelSplit const ¶llel_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints); MachineMappingResult get_optimal_machine_mapping( MachineMappingCache &result_cache, MachineMappingContext const &, UnmappedRuntimeOnlyOpCostEstimateKey const &leaf, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/machine_mapping/get_tensor_set_movement_across_split.h b/lib/compiler/include/compiler/machine_mapping/get_tensor_set_movement_across_split.h index 2aed9a20e4..48eaf59592 100644 --- a/lib/compiler/include/compiler/machine_mapping/get_tensor_set_movement_across_split.h +++ b/lib/compiler/include/compiler/machine_mapping/get_tensor_set_movement_across_split.h @@ -5,6 +5,7 @@ #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" #include "compiler/machine_mapping/transitive_reduced_pcg.dtg.h" #include "compiler/series_parallel/pcg/pcg_binary_series_split.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" namespace FlexFlow { diff --git a/lib/compiler/include/compiler/machine_mapping/machine_compute_resource_slice.h b/lib/compiler/include/compiler/machine_mapping/machine_compute_resource_slice.h new file mode 100644 index 0000000000..79e757ee03 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/machine_compute_resource_slice.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_COMPUTE_RESOURCE_SLICE_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_COMPUTE_RESOURCE_SLICE_H + +#include "pcg/machine_compute_specification.dtg.h" +#include "compiler/machine_mapping/machine_compute_resource_slice.dtg.h" + +namespace FlexFlow { + +MachineComputeResourceSlice + compute_slice_from_specification(MachineComputeSpecification const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/machine_compute_resource_slice.struct.toml b/lib/compiler/include/compiler/machine_mapping/machine_compute_resource_slice.struct.toml new file mode 100644 index 0000000000..dfa1716b9f --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/machine_compute_resource_slice.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "MachineComputeResourceSlice" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "utils/positive_int/positive_int.h", +] + +[[fields]] +name = "num_nodes" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "num_gpus_per_node" +type = "::FlexFlow::positive_int" diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping/machine_mapping.h index 7375cde985..088e9fd2b0 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping.h @@ -1,10 +1,9 @@ -#ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_H -#define _FLEXFLOW_COMPILER_MACHINE_MAPPING_H +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_MAPPING_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_MAPPING_H #include "compiler/machine_mapping/machine_mapping.dtg.h" #include "pcg/device_id_t.dtg.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/operator_task_space.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" namespace FlexFlow { diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping.struct.toml b/lib/compiler/include/compiler/machine_mapping/machine_mapping.struct.toml index 92517c1110..5f9d302007 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping.struct.toml @@ -2,7 +2,7 @@ namespace = "FlexFlow" name = "MachineMapping" features = [ "eq", - # "ord", + "ord", "hash", # "json", # "rapidcheck", @@ -17,6 +17,7 @@ includes = [ src_includes = [ "utils/hash/unordered_map.h", "utils/fmt/unordered_map.h", + "utils/ord/unordered_map.h", ] [[fields]] diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping_context.struct.toml b/lib/compiler/include/compiler/machine_mapping/machine_mapping_context.struct.toml index dd49aaa98a..9f17005b8b 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping_context.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping_context.struct.toml @@ -5,7 +5,7 @@ features = [] includes = [ "compiler/cost_estimator/runtime_only_cost_estimator.h", "pcg/machine_view.dtg.h", - "pcg/machine_specification.dtg.h", + "compiler/machine_mapping/machine_compute_resource_slice.dtg.h", "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_runtime_only_op_cost_estimate_key.dtg.h", ] @@ -15,4 +15,4 @@ type = "::FlexFlow::RuntimeOnlyCostEstimator" [[fields]] name = "allowed_machine_views" -type = "std::function(::FlexFlow::UnmappedRuntimeOnlyOpCostEstimateKey const &, ::FlexFlow::MachineSpecification const &)>" +type = "std::function(::FlexFlow::UnmappedRuntimeOnlyOpCostEstimateKey const &, ::FlexFlow::MachineComputeResourceSlice const &)>" diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h b/lib/compiler/include/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h index 65f7006b21..abd77bfa7b 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_MAPPING_PROBLEM_TREE_H #define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_MAPPING_PROBLEM_TREE_H +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/mm_problem_tree_parallel_split.dtg.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/mm_problem_tree_series_split.dtg.h" @@ -28,6 +29,9 @@ std::optional mm_problem_tree_get_subtree_at_path(MachineMappingProblemTree const &, BinaryTreePath const &); +std::unordered_map + mm_problem_tree_get_path_to_leaf_map(MachineMappingProblemTree const &); + } // namespace FlexFlow #endif diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping_result.h b/lib/compiler/include/compiler/machine_mapping/machine_mapping_result.h index 8924b1c110..f7b52ec574 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping_result.h +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping_result.h @@ -2,6 +2,7 @@ #define _FLEXFLOW_COMPILER_MACHINE_MAPPING_MACHINE_MAPPING_RESULT_H #include "compiler/machine_mapping/machine_mapping_result.dtg.h" +#include "compiler/machine_mapping/machine_resource_split.dtg.h" #include "compiler/machine_mapping/parallel_split_transformation.dtg.h" #include "utils/units/milliseconds_t.h" @@ -20,8 +21,10 @@ FeasibleMachineMappingResult require_feasible(MachineMappingResult const &); MachineMappingResult const &post_result, std::optional const ¶llel_split_transformation); + [[nodiscard]] MachineMappingResult - parallel_combine(MachineMappingResult const &lhs_result, + parallel_combine(MachineResourceSplit const &split, + MachineMappingResult const &lhs_result, MachineMappingResult const &rhs_result); [[nodiscard]] MachineMappingResult diff --git a/lib/compiler/include/compiler/machine_mapping/machine_mapping_state.struct.toml b/lib/compiler/include/compiler/machine_mapping/machine_mapping_state.struct.toml index 1346f6ebe7..71b3045bf1 100644 --- a/lib/compiler/include/compiler/machine_mapping/machine_mapping_state.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/machine_mapping_state.struct.toml @@ -7,8 +7,8 @@ features = [ ] includes = [ - "pcg/machine_specification.dtg.h", "compiler/machine_mapping/machine_mapping_constraints.dtg.h", + "compiler/machine_mapping/machine_compute_resource_slice.dtg.h", "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h", ] @@ -18,7 +18,7 @@ type = "::FlexFlow::MachineMappingProblemTree" [[fields]] name = "resources" -type = "::FlexFlow::MachineSpecification" +type = "::FlexFlow::MachineComputeResourceSlice" [[fields]] name = "constraints" diff --git a/lib/compiler/include/compiler/machine_mapping/machine_resource_split.h b/lib/compiler/include/compiler/machine_mapping/machine_resource_split.h new file mode 100644 index 0000000000..d19ccd5857 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/machine_resource_split.h @@ -0,0 +1,32 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_RESOURCE_SPLIT_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MACHINE_RESOURCE_SPLIT_H + +#include "compiler/machine_mapping/machine_compute_resource_slice.dtg.h" +#include "compiler/machine_mapping/machine_resource_split.dtg.h" +#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include "pcg/machine_view.dtg.h" + +namespace FlexFlow { + +std::pair + apply_resource_split(MachineResourceSplit const &split, + MachineComputeResourceSlice const &resources); + +std::unordered_set + get_machine_resource_splits(MachineComputeResourceSlice const &); + +MachineSpaceCoordinate + offset_machine_space_coordinate_by(MachineSpaceCoordinate const &, + MachineResourceSplit const &); + +MachineView + offset_machine_view_by(MachineView const &, + MachineResourceSplit const &); + +ParallelLayerGuidObliviousMachineMapping + offset_layer_oblivious_mapping_by(ParallelLayerGuidObliviousMachineMapping const &mapping, + MachineResourceSplit const &split); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/machine_resource_split.struct.toml b/lib/compiler/include/compiler/machine_mapping/machine_resource_split.struct.toml new file mode 100644 index 0000000000..a49e8c47c3 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/machine_resource_split.struct.toml @@ -0,0 +1,23 @@ +namespace = "FlexFlow" +name = "MachineResourceSplit" +features = [ + "eq", + "ord", + "hash", + "fmt", + "rapidcheck", + "json", +] + +includes = [ + "utils/positive_int/positive_int.h", + "pcg/machine_specification_dimension.dtg.h", +] + +[[fields]] +name = "offset" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "dimension" +type = "::FlexFlow::MachineSpecificationDimension" diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h b/lib/compiler/include/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h index 74c6aee851..3c1dc5f9fb 100644 --- a/lib/compiler/include/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h +++ b/lib/compiler/include/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h @@ -10,7 +10,7 @@ #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.dtg.h" #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_context.dtg.h" #include "compiler/machine_mapping/parallel_split_transformation.dtg.h" -#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" namespace FlexFlow { @@ -18,14 +18,14 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, MachineMappingProblemTree const &problem_tree, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints); MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, MMProblemTreeSeriesSplit const &series_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints, std::optional const ¶llel_split_transformation); @@ -34,14 +34,14 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, MMProblemTreeParallelSplit const ¶llel_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints); MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, UnmappedRuntimeOnlyOpCostEstimateKey const &leaf, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.struct.toml b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.struct.toml index c2fe393e99..d3b2590fcd 100644 --- a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.struct.toml @@ -9,7 +9,7 @@ features = [ includes = [ "", "compiler/machine_mapping/machine_mapping_state.dtg.h", - "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.dtg.h", + "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h", ] src_includes = [ diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_context.struct.toml b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_context.struct.toml index 9530697632..245876434a 100644 --- a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_context.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_context.struct.toml @@ -5,7 +5,7 @@ features = [] includes = [ "compiler/cost_estimator/cost_estimator.h", "pcg/machine_view.dtg.h", - "pcg/machine_specification.dtg.h", + "compiler/machine_mapping/machine_compute_resource_slice.dtg.h", "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_runtime_only_op_cost_estimate_key.dtg.h", "pcg/optimizer_attrs.dtg.h", ] @@ -20,4 +20,4 @@ type = "::FlexFlow::OptimizerAttrs" [[fields]] name = "allowed_machine_views" -type = "std::function(::FlexFlow::UnmappedRuntimeOnlyOpCostEstimateKey const &, ::FlexFlow::MachineSpecification const &)>" +type = "std::function(::FlexFlow::UnmappedRuntimeOnlyOpCostEstimateKey const &, ::FlexFlow::MachineComputeResourceSlice const &)>" diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h index 4cb865dece..53eae8ec8c 100644 --- a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h +++ b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h @@ -1,12 +1,36 @@ #ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_MACHINE_MAPPING_RESULT_WITH_MEMORY_H #define _FLEXFLOW_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_MACHINE_MAPPING_RESULT_WITH_MEMORY_H -#include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.dtg.h" +#include "compiler/machine_mapping/machine_resource_split.dtg.h" #include "compiler/machine_mapping/parallel_split_transformation.dtg.h" #include +#include "compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.dtg.h" namespace FlexFlow { +struct MachineMappingWithMemoryResult { + MachineMappingWithMemoryResult() = delete; + + explicit MachineMappingWithMemoryResult( + std::unordered_set const &); + + bool operator==(MachineMappingWithMemoryResult const &) const; + bool operator!=(MachineMappingWithMemoryResult const &) const; + + std::unordered_set const &get_pareto_frontier() const; +private: + std::unordered_set m_pareto_frontier; +private: + std::tuple< + decltype(m_pareto_frontier) const & + > tie() const; + + friend struct ::std::hash; +}; + +std::string format_as(MachineMappingWithMemoryResult const &); +std::ostream &operator<<(std::ostream &, MachineMappingWithMemoryResult const &); + [[nodiscard]] MachineMappingWithMemoryResult empty_machine_mapping_with_memory_result(); [[nodiscard]] bool is_empty(MachineMappingWithMemoryResult const &); @@ -14,10 +38,6 @@ namespace FlexFlow { [[nodiscard]] MachineMappingWithMemoryResult get_mapping_with_minimal_runtime( std::unordered_set const &); -[[nodiscard]] MachineMappingWithMemoryResult - remove_non_pareto_optimal_machine_mapping_result( - MachineMappingWithMemoryResult const &); - [[nodiscard]] MachineMappingWithMemoryResult series_combine(milliseconds_t comm_cost, MachineMappingWithMemoryResult const &pre_result, @@ -25,7 +45,8 @@ namespace FlexFlow { std::optional const ¶llel_split_transformation); [[nodiscard]] MachineMappingWithMemoryResult - parallel_combine(MachineMappingWithMemoryResult const &lhs_result, + parallel_combine(MachineResourceSplit const &split, + MachineMappingWithMemoryResult const &lhs_result, MachineMappingWithMemoryResult const &rhs_result); [[nodiscard]] MachineMappingWithMemoryResult @@ -38,4 +59,13 @@ namespace FlexFlow { } // namespace FlexFlow +namespace std { + +template <> +struct hash<::FlexFlow::MachineMappingWithMemoryResult> { + size_t operator()(::FlexFlow::MachineMappingWithMemoryResult const &) const; +}; + +} + #endif diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.struct.toml b/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.struct.toml deleted file mode 100644 index c1e1ee1cac..0000000000 --- a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.struct.toml +++ /dev/null @@ -1,20 +0,0 @@ -namespace = "FlexFlow" -name = "MachineMappingWithMemoryResult" -features = [ - "eq", - "hash", - "fmt", -] - -includes = [ - "compiler/machine_mapping/memory_optimization/machine_mapping_for_single_layer.dtg.h", -] - -src_includes = [ - "utils/hash/unordered_set.h", - "utils/fmt/unordered_set.h", -] - -[[fields]] -name = "machine_mappings" -type = "std::unordered_set<::FlexFlow::MachineMappingForSingleLayer>" diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.h b/lib/compiler/include/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.h new file mode 100644 index 0000000000..50e3ee1094 --- /dev/null +++ b/lib/compiler/include/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_PARETO_OPTIMAL_MACHINE_MAPPING_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_PARETO_OPTIMAL_MACHINE_MAPPING_H + +#include "compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.dtg.h" + +namespace FlexFlow { + +bool is_pareto_optimal_in(ParetoOptimalMachineMapping const &, + std::unordered_set const &); + + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_for_single_layer.struct.toml b/lib/compiler/include/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.struct.toml similarity index 90% rename from lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_for_single_layer.struct.toml rename to lib/compiler/include/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.struct.toml index b61dd134c0..03f55c4b62 100644 --- a/lib/compiler/include/compiler/machine_mapping/memory_optimization/machine_mapping_for_single_layer.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "MachineMappingForSingleLayer" +name = "ParetoOptimalMachineMapping" features = [ "eq", "hash", diff --git a/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h b/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h index cb3af9c689..60d4340440 100644 --- a/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h @@ -1,7 +1,11 @@ #ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_PARALLEL_LAYER_GUID_OBLIVIOUS_MACHINE_MAPPING_H #define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_PARALLEL_LAYER_GUID_OBLIVIOUS_MACHINE_MAPPING_H +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h" #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" #include namespace FlexFlow { @@ -19,6 +23,20 @@ std::optional get_machine_view_for_path(ParallelLayerGuidObliviousMachineMapping const &, BinaryTreePath const &); +std::unordered_map + get_machine_stencils_for_decomposition(ParallelComputationGraph const &pcg, + PCGBinarySPDecomposition const &decomposition, + ParallelLayerGuidObliviousMachineMapping const &mapping); + +std::unordered_map> + get_machine_stencils_for_mm_problem_tree(MachineMappingProblemTree const &, + ParallelLayerGuidObliviousMachineMapping const &mapping); + +std::unordered_map + get_machine_stencils_for_partially_mapped_mm_problem_tree( + MachineMappingProblemTree const &, + ParallelLayerGuidObliviousMachineMapping const &); + } // namespace FlexFlow #endif diff --git a/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.struct.toml b/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.struct.toml index f00fcc8490..5548872c40 100644 --- a/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.struct.toml +++ b/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.struct.toml @@ -4,6 +4,7 @@ features = [ "eq", "hash", "fmt", + "rapidcheck", ] includes = [ diff --git a/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h b/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h index 0fb31210fd..ef96637b01 100644 --- a/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping/unstructured_device_mapping.h @@ -3,14 +3,14 @@ #include "compiler/machine_mapping/machine_mapping.dtg.h" #include "compiler/machine_mapping/unstructured_device_mapping.dtg.h" -#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" namespace FlexFlow { UnstructuredDeviceMapping get_unstructured_device_mapping(MachineMapping const &machine_mapping, - MachineSpecification const &machine_spec, + MachineComputeSpecification const &machine_spec, ParallelComputationGraph const &pcg); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/mapped_operator_task_group.h b/lib/compiler/include/compiler/mapped_operator_task_group.h new file mode 100644 index 0000000000..c593e6c412 --- /dev/null +++ b/lib/compiler/include/compiler/mapped_operator_task_group.h @@ -0,0 +1,54 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MAPPED_OPERATOR_TASK_GROUP_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MAPPED_OPERATOR_TASK_GROUP_H + +#include "compiler/operator_atomic_task_shard_binding.dtg.h" +#include "op-attrs/computation_graph_op_attrs.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "utils/bidict/bidict.h" +#include "compiler/mapped_task_signature_tensor_key.dtg.h" + +namespace FlexFlow { + +struct MappedOperatorTaskGroup { + MappedOperatorTaskGroup() = delete; + + explicit MappedOperatorTaskGroup(bidict const &shard_bindings); + + [[nodiscard]] bool operator==(MappedOperatorTaskGroup const &) const; + [[nodiscard]] bool operator!=(MappedOperatorTaskGroup const &) const; + + [[nodiscard]] bidict const &get_shard_bindings() const; + +private: + bidict shard_bindings; + +private: + [[nodiscard]] std::tuple< + decltype(shard_bindings) const & + > tie() const; + + friend struct ::std::hash; +}; + +std::string format_as(::FlexFlow::MappedOperatorTaskGroup const &); +std::ostream &operator<<(std::ostream &, ::FlexFlow::MappedOperatorTaskGroup const &); + +MappedOperatorTaskGroup + mapped_operator_task_group_from_machine_view( + ComputationGraphOpAttrs const &, + std::vector const &, + MachineView const &); + + +} // namespace FlexFlow + +namespace std { + +template <> +struct hash<::FlexFlow::MappedOperatorTaskGroup> { + size_t operator()(::FlexFlow::MappedOperatorTaskGroup const &) const; +}; + +} // namespace std +#endif diff --git a/lib/compiler/include/compiler/mapped_parallel_computation_graph.h b/lib/compiler/include/compiler/mapped_parallel_computation_graph.h new file mode 100644 index 0000000000..c9c4580763 --- /dev/null +++ b/lib/compiler/include/compiler/mapped_parallel_computation_graph.h @@ -0,0 +1,27 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MAPPED_PARALLEL_COMPUTATION_GRAPH_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MAPPED_PARALLEL_COMPUTATION_GRAPH_H + +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "compiler/mapped_parallel_computation_graph.dtg.h" +#include "op-attrs/computation_graph_op_attrs.dtg.h" +#include "op-attrs/parallel_tensor_space_coordinate.dtg.h" +#include "pcg/machine_view.dtg.h" + +namespace FlexFlow { + +MappedParallelComputationGraph + mapped_pcg_from_pcg_and_mapping( + ParallelComputationGraph const &, + MachineMapping const &); + +bidict + get_tensor_shard_to_device_coord_mapping(ComputationGraphOpAttrs const &, + MachineView const &); + + +std::string format_as(MappedParallelComputationGraph const &); +std::ostream &operator<<(std::ostream &, MappedParallelComputationGraph const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/mapped_parallel_computation_graph.struct.toml b/lib/compiler/include/compiler/mapped_parallel_computation_graph.struct.toml new file mode 100644 index 0000000000..3b5ba1aebb --- /dev/null +++ b/lib/compiler/include/compiler/mapped_parallel_computation_graph.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "MappedParallelComputationGraph" +features = [] + +includes = [ + "compiler/mapped_operator_task_group.h", + "pcg/parallel_computation_graph/parallel_computation_graph.h", + "", +] + +[[fields]] +name = "pcg" +type = "::FlexFlow::ParallelComputationGraph" + +[[fields]] +name = "mapped_tasks" +type = "std::unordered_map<::FlexFlow::parallel_layer_guid_t, ::FlexFlow::MappedOperatorTaskGroup>" diff --git a/lib/compiler/include/compiler/mapped_task_signature_tensor_key.struct.toml b/lib/compiler/include/compiler/mapped_task_signature_tensor_key.struct.toml new file mode 100644 index 0000000000..e172e58f57 --- /dev/null +++ b/lib/compiler/include/compiler/mapped_task_signature_tensor_key.struct.toml @@ -0,0 +1,27 @@ +namespace = "FlexFlow" +name = "MappedTaskSignatureTensorKey" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", + "op-attrs/tensor_role.dtg.h", + "pcg/gpu_id_t.dtg.h", +] + +[[fields]] +name = "gpu_id" +type = "::FlexFlow::gpu_id_t" + +[[fields]] +name = "tensor_role" +type = "::FlexFlow::TensorRole" + +[[fields]] +name = "idx" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/compiler/include/compiler/operator_atomic_task_shard_binding.h b/lib/compiler/include/compiler/operator_atomic_task_shard_binding.h new file mode 100644 index 0000000000..c03bf2af86 --- /dev/null +++ b/lib/compiler/include/compiler/operator_atomic_task_shard_binding.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_OPERATOR_ATOMIC_TASK_SHARD_BINDING_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_OPERATOR_ATOMIC_TASK_SHARD_BINDING_H + +#include "compiler/operator_atomic_task_shard_binding.dtg.h" +#include "compiler/task_signature_tensor_key.dtg.h" +#include "op-attrs/computation_graph_op_attrs.dtg.h" +#include "op-attrs/parallel_tensor_space_coordinate.dtg.h" +#include "pcg/machine_view.dtg.h" + +namespace FlexFlow { + +OperatorAtomicTaskShardBinding + operator_atomic_task_shard_binding_from_machine_view(ComputationGraphOpAttrs const &, + std::vector const &, + MachineView const &, + MachineSpaceCoordinate const &); + +std::vector + ptensor_space_coords_for_role(OperatorAtomicTaskShardBinding const &, + TensorRole); + +ParallelTensorSpaceCoordinate + ptensor_space_coord_for_key(OperatorAtomicTaskShardBinding const &, + TaskSignatureTensorKey const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/operator_atomic_task_shard_binding.struct.toml b/lib/compiler/include/compiler/operator_atomic_task_shard_binding.struct.toml new file mode 100644 index 0000000000..35faa5fc8e --- /dev/null +++ b/lib/compiler/include/compiler/operator_atomic_task_shard_binding.struct.toml @@ -0,0 +1,31 @@ +namespace = "FlexFlow" +name = "OperatorAtomicTaskShardBinding" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] + +includes = [ + "op-attrs/parallel_tensor_space_coordinate.dtg.h", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h", + "utils/ord/vector.h", +] + +[[fields]] +name = "inputs" +type = "std::vector<::FlexFlow::ParallelTensorSpaceCoordinate>" + +[[fields]] +name = "weights" +type = "std::vector<::FlexFlow::ParallelTensorSpaceCoordinate>" + +[[fields]] +name = "outputs" +type = "std::vector<::FlexFlow::ParallelTensorSpaceCoordinate>" diff --git a/lib/compiler/include/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h b/lib/compiler/include/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h index 86fa1a59aa..74ad521884 100644 --- a/lib/compiler/include/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h +++ b/lib/compiler/include/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h @@ -29,10 +29,16 @@ std::unordered_multiset SPDecompositionTreeNodeType get_node_type(PCGBinarySPDecomposition const &); +std::unordered_set + pcg_sp_tree_get_all_leaf_paths(PCGBinarySPDecomposition const &); + std::unordered_set find_paths_to_leaf(PCGBinarySPDecomposition const &, parallel_layer_guid_t const &); +std::unordered_map + pcg_sp_tree_get_path_to_leaf_map(PCGBinarySPDecomposition const &); + } // namespace FlexFlow #endif diff --git a/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h b/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h index 2c6d6514e8..09b08c1c1c 100644 --- a/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h +++ b/lib/compiler/include/compiler/task_graph_simulator/pcg_task_graph.h @@ -3,14 +3,14 @@ #include "compiler/machine_mapping/machine_mapping.dtg.h" #include "compiler/task_graph_simulator/pcg_task_graph.dtg.h" -#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" namespace FlexFlow { PCGTaskGraph get_pcg_task_graph(ParallelComputationGraph const &pcg, MachineMapping const &machine_mapping, - MachineSpecification const &machine_spec); + MachineComputeSpecification const &machine_spec); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/task_signature_tensor_key.h b/lib/compiler/include/compiler/task_signature_tensor_key.h new file mode 100644 index 0000000000..4ddb3462c3 --- /dev/null +++ b/lib/compiler/include/compiler/task_signature_tensor_key.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_TASK_SIGNATURE_TENSOR_KEY_H +#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_TASK_SIGNATURE_TENSOR_KEY_H + +#include +#include "compiler/task_signature_tensor_key.dtg.h" + +namespace FlexFlow { + +std::unordered_set + all_keys_for_signature_arities( + nonnegative_int num_inputs, + nonnegative_int num_weights, + nonnegative_int num_outputs); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/task_signature_tensor_key.struct.toml b/lib/compiler/include/compiler/task_signature_tensor_key.struct.toml new file mode 100644 index 0000000000..cc2f711966 --- /dev/null +++ b/lib/compiler/include/compiler/task_signature_tensor_key.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "TaskSignatureTensorKey" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", + "op-attrs/tensor_role.dtg.h", +] + +[[fields]] +name = "tensor_role" +type = "::FlexFlow::TensorRole" + +[[fields]] +name = "idx" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 370cb5a4ec..605c4865ab 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -1,8 +1,8 @@ #include "compiler/allowed_machine_views.h" -#include "pcg/machine_specification.h" +#include "op-attrs/operator_task_space.h" +#include "pcg/machine_compute_specification.h" #include "pcg/machine_view.h" #include "pcg/multi_dimensional_stride.dtg.h" -#include "pcg/operator_task_space.h" #include "utils/containers/all_of.h" #include "utils/containers/cartesian_product.h" #include "utils/containers/extend.h" @@ -25,16 +25,17 @@ namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, - OperatorTaskSpace const &task, - MachineSpecification const &ms) { - if (num_dims(mv) != num_dims(task)) { + OperatorTaskSpace const &task_space, + MachineComputeSpecification const &ms) { + if (mv_get_expected_task_space_num_dims(mv) != op_task_space_num_dims(task_space)) { return false; } - std::optional maximum_device_coord = + MachineSpaceCoordinate maximum_device_coord = get_machine_space_coordinate( - task, mv, get_task_space_maximum_coordinate(task), ms); - return maximum_device_coord.has_value(); + task_space, mv, get_task_space_maximum_coordinate(task_space)); + + return is_valid_machine_space_coordinate(ms, maximum_device_coord); } /* @@ -46,8 +47,8 @@ bool is_valid_machine_view(MachineView const &mv, * the returned `MachineView`s to be invalid) */ static std::unordered_set - get_candidate_machine_views(MachineSpecification const &machine_spec, - OperatorTaskSpace const &task, + get_candidate_machine_views(MachineComputeSpecification const &machine_spec, + OperatorTaskSpace const &task_space, DeviceType const &device_type) { auto get_max_stride_upper_bound = @@ -83,14 +84,13 @@ static std::unordered_set return strides; }; - auto candidate_starts = [](MachineSpecification const &ms, + auto candidate_starts = [](MachineComputeSpecification const &ms, DeviceType const &device_type) { std::unordered_set result; for (nonnegative_int node_idx : - nonnegative_range(ms.num_nodes.nonnegative_int_from_positive_int())) { + nonnegative_range(ms.num_nodes)) { for (nonnegative_int device_idx : - nonnegative_range(get_num_devices_per_node(ms, device_type) - .nonnegative_int_from_positive_int())) { + nonnegative_range(get_num_devices_per_node(ms, device_type))) { result.insert( MachineSpaceCoordinate{node_idx, device_idx, device_type}); } @@ -98,14 +98,18 @@ static std::unordered_set return result; }; - auto candidate_dimensions = [](OperatorTaskSpace const &task) { + auto candidate_dimensions = [](OperatorTaskSpace const &task_space) { std::unordered_set options = { MachineSpecificationDimension::INTER_NODE, MachineSpecificationDimension::INTRA_NODE}; - return get_all_permutations_with_repetition(options, num_dims(task)); + return get_all_permutations_with_repetition(options, op_task_space_num_dims(task_space)); }; - std::vector tensor_dims = task.degrees; + std::vector tensor_dims = transform(task_space.degrees.dims, + [](int_ge_two dim) { + return dim.positive_int_from_int_ge_two(); + }); + positive_int total_devices = get_num_devices(machine_spec, device_type); std::unordered_set machine_views; @@ -115,7 +119,7 @@ static std::unordered_set for (MachineSpaceCoordinate start : candidate_starts(machine_spec, device_type)) { for (std::vector const &dims : - candidate_dimensions(task)) { + candidate_dimensions(task_space)) { machine_views.insert( machine_view_from_strides_and_machine_spec_dimensions( start, strides.raw_strides, dims)); @@ -126,14 +130,14 @@ static std::unordered_set } std::unordered_set - get_allowed_machine_views(MachineSpecification const &machine_spec, - OperatorTaskSpace const &task, + get_allowed_machine_views(MachineComputeSpecification const &machine_spec, + OperatorTaskSpace const &task_space, DeviceType device_type) { std::unordered_set views = - get_candidate_machine_views(machine_spec, task, device_type); + get_candidate_machine_views(machine_spec, task_space, device_type); return filter(views, [&](MachineView const &mv) { - return is_valid_machine_view(mv, task, machine_spec); + return is_valid_machine_view(mv, task_space, machine_spec); }); } diff --git a/lib/compiler/src/compiler/cost_estimator/communication_edge.cc b/lib/compiler/src/compiler/cost_estimator/communication_edge.cc new file mode 100644 index 0000000000..b02841741f --- /dev/null +++ b/lib/compiler/src/compiler/cost_estimator/communication_edge.cc @@ -0,0 +1,73 @@ +#include "compiler/cost_estimator/communication_edge.h" +#include +#include "utils/hash-utils.h" +#include "utils/hash/tuple.h" + +namespace FlexFlow { + +CommunicationEdge::CommunicationEdge( + MachineSpaceCoordinate const &src, + MachineSpaceCoordinate const &dst) + : src(src), dst(dst) +{ + ASSERT(src != dst); +} + +bool CommunicationEdge::operator==(CommunicationEdge const &other) const { + return this->tie() == other.tie(); +} + +bool CommunicationEdge::operator!=(CommunicationEdge const &other) const { + return this->tie() != other.tie(); +} + +bool CommunicationEdge::operator<(CommunicationEdge const &other) const { + return this->tie() < other.tie(); +} + +bool CommunicationEdge::operator>(CommunicationEdge const &other) const { + return this->tie() > other.tie(); +} + +bool CommunicationEdge::operator<=(CommunicationEdge const &other) const { + return this->tie() <= other.tie(); +} + +bool CommunicationEdge::operator>=(CommunicationEdge const &other) const { + return this->tie() >= other.tie(); +} + +MachineSpaceCoordinate const &CommunicationEdge::get_src() const { + return this->src; +} + +MachineSpaceCoordinate const &CommunicationEdge::get_dst() const { + return this->dst; +} + +std::tuple< + MachineSpaceCoordinate const &, + MachineSpaceCoordinate const & +> CommunicationEdge::tie() const { + return std::tie(this->src, this->dst); +} + +std::string format_as(CommunicationEdge const &e) { + return fmt::format("", + e.get_src(), + e.get_dst()); +} + +std::ostream &operator<<(std::ostream &s, CommunicationEdge const &e) { + return (s << fmt::to_string(e)); +} + +} // namespace FlexFlow + +namespace std { + +size_t hash<::FlexFlow::CommunicationEdge>::operator()(::FlexFlow::CommunicationEdge const &e) const { + return get_std_hash(e.tie()); +} + +} diff --git a/lib/compiler/src/compiler/cost_estimator/network_cost_model.cc b/lib/compiler/src/compiler/cost_estimator/network_cost_model.cc new file mode 100644 index 0000000000..8002cfa526 --- /dev/null +++ b/lib/compiler/src/compiler/cost_estimator/network_cost_model.cc @@ -0,0 +1,16 @@ +#include "compiler/cost_estimator/network_cost_model.h" +#include "utils/exception.h" + +namespace FlexFlow { + +float estimate_communication_cost( + MachineSpecification const &machine_spec, + TensorSetMovement const &tensor_set_movement) { + NOT_IMPLEMENTED(); // TODO @lockshaw + // for (SingleTensorMovement const &single_tensor_movement : + // tensor_set_movement.single_tensor_movements) { + // for + // } +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc b/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc index 92b07bbe23..edd9eba9b4 100644 --- a/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc +++ b/lib/compiler/src/compiler/cost_estimator/op_cost_estimate_key.cc @@ -6,7 +6,6 @@ #include "pcg/machine_specification.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" -#include "pcg/operator_task_space.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" #include diff --git a/lib/compiler/src/compiler/cost_estimator/op_cost_metrics.cc b/lib/compiler/src/compiler/cost_estimator/op_cost_metrics.cc index 2bca184419..deb9a470d1 100644 --- a/lib/compiler/src/compiler/cost_estimator/op_cost_metrics.cc +++ b/lib/compiler/src/compiler/cost_estimator/op_cost_metrics.cc @@ -1,7 +1,19 @@ #include "compiler/cost_estimator/op_cost_metrics.h" +#include "utils/containers/all_of.h" namespace FlexFlow { +bool is_pareto_optimal_in(OpCostMetrics const &m, + std::unordered_set const &others) { + return all_of(others, + [&](OpCostMetrics const &other) { + return + m.forward_runtime <= other.forward_runtime + || m.backward_runtime <= other.backward_runtime + || m.memory_usage <= other.memory_usage; + }); +} + OpCostMetrics make_op_cost_metrics_from_runtime_only( RuntimeOnlyOpCostMetrics const &runtime_only, num_bytes_t const &memory_usage) { diff --git a/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc b/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc index 8f2ab84b84..5f6b57cf69 100644 --- a/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc +++ b/lib/compiler/src/compiler/cost_estimator/tensor_set_movement.cc @@ -1,16 +1,60 @@ #include "compiler/cost_estimator/tensor_set_movement.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" +#include "utils/containers/unordered_multiset_of.h" +#include "utils/full_binary_tree/binary_tree_path.dtg.h" + namespace FlexFlow { -TensorSetMovement get_tensor_set_movement_from_pcg_edge( +TensorSetMovement + empty_tensor_set_movement() { + + return TensorSetMovement{{}}; +} + +TensorSetMovement + get_tensor_set_movement_from_pcg_edge( ParallelComputationGraphEdge const &edge, ParallelComputationGraph const &pcg, MachineView const &src_mv, MachineView const &dst_mv) { - ParallelTensorShape tensor_shape = - get_parallel_tensor_shape(pcg, parallel_tensor_guid_t{edge.raw_edge.src}); - return TensorSetMovement{ - {SingleTensorMovement{tensor_shape, {src_mv}, {dst_mv}}}}; + + parallel_layer_guid_t src = get_src_layer(edge); + parallel_layer_guid_t dst = get_dst_layer(edge); + + BinaryTreePath src_path = BinaryTreePath{{BinaryTreePathEntry::LEFT_CHILD}}; + BinaryTreePath dst_path = BinaryTreePath{{BinaryTreePathEntry::RIGHT_CHILD}}; + + AbstractedSingleTensorMovement abstracted_single_tensor_movement = + get_abstracted_single_tensor_movement_along_edge( + /*pcg=*/pcg, + /*edge=*/edge, + /*src_path=*/src_path, + /*dst_path=*/dst_path); + + AbstractedTensorSetMovement abstracted_tensor_set_movement + = abstracted_tensor_set_movement_from_single_tensor_movement(abstracted_single_tensor_movement); + + MachineSpaceStencil src_machine_stencil = MachineSpaceStencil{ + /*operator_task_space=*/get_operator_task_space(pcg, src), + /*machine_view=*/src_mv, + }; + + MachineSpaceStencil dst_machine_stencil = MachineSpaceStencil{ + /*operator_task_space=*/get_operator_task_space(pcg, dst), + /*machine_view=*/dst_mv, + }; + + return concretize_abstracted_tensor_set_movement( + abstracted_tensor_set_movement, + /*pre_machine_stencils=*/std::unordered_map{ + {src_path, src_machine_stencil}, + }, + /*post_machine_stencils=*/std::unordered_map{ + {dst_path, dst_machine_stencil}, + }); } } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/graph_optimize_result.cc b/lib/compiler/src/compiler/graph_optimize_result.cc deleted file mode 100644 index f48c119603..0000000000 --- a/lib/compiler/src/compiler/graph_optimize_result.cc +++ /dev/null @@ -1,15 +0,0 @@ -#include "compiler/graph_optimize_result.h" - -namespace FlexFlow { - -std::string format_as(GraphOptimizeResult const &r) { - return fmt::format("", - as_dot(r.pcg), - r.machine_mapping); -} - -std::ostream &operator<<(std::ostream &s, GraphOptimizeResult const &r) { - return (s << fmt::to_string(r)); -} - -} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/graph_optimize_state.cc b/lib/compiler/src/compiler/graph_optimize_state.cc index 1091b92866..3de9357dd6 100644 --- a/lib/compiler/src/compiler/graph_optimize_state.cc +++ b/lib/compiler/src/compiler/graph_optimize_state.cc @@ -1,5 +1,4 @@ #include "compiler/graph_optimize_state.h" -#include "compiler/graph_optimize_result.h" #include "pcg/parallel_computation_graph/parallel_tensor_guid_t.h" namespace FlexFlow { @@ -12,20 +11,20 @@ bool GraphOptimizeState::operator==(GraphOptimizeState const &other) const { // Note(@wmdi): This is a hack to implement a partially correct homomorphism // check. Switch to the homomorphism check used in substitutions right after // https://github.com/flexflow/FlexFlow/pull/1471 is merged. - auto layers1 = topological_ordering(graph_optimize_result.pcg); - auto layers2 = topological_ordering(other.graph_optimize_result.pcg); + auto layers1 = topological_ordering(graph_optimize_result.mapped_pcg.pcg); + auto layers2 = topological_ordering(other.graph_optimize_result.mapped_pcg.pcg); if (layers1.size() != layers2.size()) { return false; } std::unordered_map mapping; for (size_t i = 0; i < layers1.size(); ++i) { - if (get_parallel_layer_attrs(graph_optimize_result.pcg, layers1[i]) != - get_parallel_layer_attrs(other.graph_optimize_result.pcg, layers2[i])) { + if (get_parallel_layer_attrs(graph_optimize_result.mapped_pcg.pcg, layers1[i]) != + get_parallel_layer_attrs(other.graph_optimize_result.mapped_pcg.pcg, layers2[i])) { return false; } - auto inputs1 = get_incoming_tensors(graph_optimize_result.pcg, layers1[i]); + auto inputs1 = get_incoming_tensors(graph_optimize_result.mapped_pcg.pcg, layers1[i]); auto inputs2 = - get_incoming_tensors(other.graph_optimize_result.pcg, layers2[i]); + get_incoming_tensors(other.graph_optimize_result.mapped_pcg.pcg, layers2[i]); if (inputs1.size() != inputs2.size()) { return false; } @@ -34,9 +33,9 @@ bool GraphOptimizeState::operator==(GraphOptimizeState const &other) const { return false; } } - auto outputs1 = get_layer_outputs(graph_optimize_result.pcg, layers1[i]); + auto outputs1 = get_layer_outputs(graph_optimize_result.mapped_pcg.pcg, layers1[i]); auto outputs2 = - get_layer_outputs(other.graph_optimize_result.pcg, layers2[i]); + get_layer_outputs(other.graph_optimize_result.mapped_pcg.pcg, layers2[i]); if (outputs1.size() != outputs2.size()) { return false; } @@ -74,12 +73,12 @@ size_t hash<::FlexFlow::GraphOptimizeState>::operator()( // TODO(@wmdi): Eventually it might be good to use a proper graph hash like // https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash.html#networkx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash size_t seed = 0; - auto layers = topological_ordering(state.graph_optimize_result.pcg); + auto layers = topological_ordering(state.graph_optimize_result.mapped_pcg.pcg); ::FlexFlow::hash_combine(seed, layers.size()); for (auto layer : layers) { ::FlexFlow::hash_combine( - seed, get_parallel_layer_attrs(state.graph_optimize_result.pcg, layer)); - auto inputs = get_incoming_tensors(state.graph_optimize_result.pcg, layer); + seed, get_parallel_layer_attrs(state.graph_optimize_result.mapped_pcg.pcg, layer)); + auto inputs = get_incoming_tensors(state.graph_optimize_result.mapped_pcg.pcg, layer); ::FlexFlow::hash_combine(seed, inputs.size()); for (auto input : inputs) { for (size_t i = 0; i < layers.size(); ++i) { diff --git a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.cc b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.cc new file mode 100644 index 0000000000..690c8e8623 --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.cc @@ -0,0 +1,26 @@ +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.dtg.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_runtime_only_op_cost_estimate_key.dtg.h" +#include "op-attrs/computation_graph_op_attrs.h" +#include "op-attrs/get_operator_task_space.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "pcg/machine_compute_specification.dtg.h" +#include "pcg/machine_view.h" +#include "utils/containers/transform.h" + +namespace FlexFlow { + +MachineSpaceCoordinate + concretize_abstracted_device( + AbstractedDevice const &abstracted_device, + std::unordered_map const &stencils) { + + return machine_space_stencil_compute_machine_coord( + stencils.at(abstracted_device.operator_tree_path), + abstracted_device.task_space_coordinate); +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.cc b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.cc new file mode 100644 index 0000000000..8d6b1076e0 --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.cc @@ -0,0 +1,27 @@ +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_device.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.h" +#include "pcg/machine_compute_specification.dtg.h" + +namespace FlexFlow { + +std::optional + concretize_abstracted_single_tensor_communication_edge( + AbstractedSingleTensorCommunicationEdge const &edge, + MachineSpaceStencil const &src_machine_stencil, + std::unordered_map const &dst_machine_stencils) { + + MachineSpaceCoordinate src = machine_space_stencil_compute_machine_coord(src_machine_stencil, edge.src_coord); + MachineSpaceCoordinate dst = concretize_abstracted_device(edge.dst, dst_machine_stencils); + + if (src == dst) { + return std::nullopt; + } else { + return CommunicationEdge{ + /*src=*/src, + /*dst=*/dst, + }; + } +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.cc b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.cc new file mode 100644 index 0000000000..22334bf61b --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.cc @@ -0,0 +1,93 @@ +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.h" +#include "utils/containers/filtermap_keys.h" +#include "utils/containers/map_from_pairs.h" +#include "utils/containers/map_keys_with_value_merging.h" +#include "utils/containers/merge_maps_with.h" +#include "utils/containers/require_same.h" +#include "utils/containers/values.h" +#include "utils/containers/transform.h" +#include "utils/containers/require_all_same1.h" + +namespace FlexFlow { + +std::unordered_set + abstracted_single_tensor_movement_get_dst_layers(AbstractedSingleTensorMovement const &m) { + return transform(keys(m.edge_to_size), + [](AbstractedSingleTensorCommunicationEdge const &e) -> BinaryTreePath { + return e.dst.operator_tree_path; + }); +} + +AbstractedSingleTensorMovement + merge_abstracted_single_tensor_movements( + std::unordered_multiset const &movements) { + + std::unordered_multiset src_paths = + transform(movements, + [](AbstractedSingleTensorMovement const &m) { + return m.src_op_tree_path; + }); + + BinaryTreePath src_op_tree_path = require_all_same1(src_paths); + + return AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/require_all_same1(src_paths), + /*edge_to_size=*/merge_maps_with( + transform(vector_of(movements), + [](AbstractedSingleTensorMovement const &m) { + return m.edge_to_size; + }), + [](num_bytes_t l, num_bytes_t r) { + return l + r; + }), + }; +} + +AbstractedSingleTensorMovement + abstracted_single_tensor_movement_from_communications( + BinaryTreePath const &src_op_tree_path, + std::unordered_set const &communications) { + + return AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src_op_tree_path, + /*edge_to_size=*/ + map_from_pairs( + transform(communications, + [](AbstractedSingleTensorCommunication const &c) { + return std::pair{c.edge, c.size}; + })), + }; +} + +TensorSetMovement concretize_abstracted_single_tensor_movement( + AbstractedSingleTensorMovement const &abstracted, + std::unordered_map const &pre_machine_stencils, + std::unordered_map const &post_machine_stencils) { + + MachineSpaceStencil pre_machine_stencil = + pre_machine_stencils.at(abstracted.src_op_tree_path); + + std::unordered_map, num_bytes_t> + communication_edges = + map_keys_with_value_merging(abstracted.edge_to_size, + /*key_func=*/[&](AbstractedSingleTensorCommunicationEdge const &k) { + return concretize_abstracted_single_tensor_communication_edge( + /*edge=*/k, + /*src_machine_stencils=*/pre_machine_stencil, + /*dst_machine_stencils=*/post_machine_stencils); + }, + /*merge_values=*/[](num_bytes_t lhs, num_bytes_t rhs) { + return require_same(lhs, rhs); + }); + + return TensorSetMovement{ + /*edge_to_size=*/ + filtermap_keys(communication_edges, + [](std::optional const &e) { + return e; + }), + }; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.cc b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.cc index 6f3deca138..40536b3001 100644 --- a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.cc +++ b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.cc @@ -1,8 +1,15 @@ #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" +#include "compiler/cost_estimator/tensor_set_movement.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h" #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" #include "utils/containers/flatmap.h" +#include "utils/containers/merge_maps_with.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_set_of.h" +#include "utils/hash/unordered_map.h" +#include "utils/containers/map_keys_with_value_merging.h" +#include "utils/containers/binary_merge_maps_with.h" namespace FlexFlow { @@ -10,53 +17,58 @@ AbstractedTensorSetMovement empty_abstracted_tensor_set_movement() { return AbstractedTensorSetMovement{{}}; } +AbstractedTensorSetMovement + abstracted_tensor_set_movement_from_single_tensor_movement( + AbstractedSingleTensorMovement const &m) { + return AbstractedTensorSetMovement{ + /*single_tensor_movements=*/{m}, + }; +} + std::unordered_set get_src_layers(AbstractedTensorSetMovement const &m) { - return flatmap(unordered_set_of(m.single_tensor_movements), - [](AbstractedSingleTensorMovement const &s) { - return s.src_machine_views; - }); + return transform(m.single_tensor_movements, + [](AbstractedSingleTensorMovement const &e) -> BinaryTreePath { + return e.src_op_tree_path; + }); } std::unordered_set get_dst_layers(AbstractedTensorSetMovement const &m) { - return flatmap(unordered_set_of(m.single_tensor_movements), - [](AbstractedSingleTensorMovement const &s) { - return s.dst_machine_views; + return flatmap(m.single_tensor_movements, + [](AbstractedSingleTensorMovement const &m) -> std::unordered_set { + return abstracted_single_tensor_movement_get_dst_layers(m); }); } TensorSetMovement concretize_abstracted_tensor_set_movement( AbstractedTensorSetMovement const &abstracted, - ParallelLayerGuidObliviousMachineMapping const &pre_mapping, - ParallelLayerGuidObliviousMachineMapping const &post_mapping) { - ParallelLayerGuidObliviousMachineMapping mapping = - binary_combine_mappings(/*lhs=*/pre_mapping, - /*rhs=*/post_mapping); - - auto concretize_tensor_movement = - [&](AbstractedSingleTensorMovement const &a) { - return SingleTensorMovement{ - /*parallel_tensor_shape=*/a.parallel_tensor_shape, - /*src_machine_views=*/ - transform( - a.src_machine_views, - [&](BinaryTreePath const &path) { - return get_machine_view_for_path(pre_mapping, path).value(); - }), - /*dst_machine_views=*/ - transform( - a.dst_machine_views, - [&](BinaryTreePath const &path) { - return get_machine_view_for_path(post_mapping, path).value(); - }), - }; - }; - - return TensorSetMovement{ - /*single_tensor_movements=*/transform(abstracted.single_tensor_movements, - concretize_tensor_movement), + std::unordered_map const &pre_machine_stencils, + std::unordered_map const &post_machine_stencils) { + + std::vector single_tensor_movements = + transform(vector_of(abstracted.single_tensor_movements), + [&](AbstractedSingleTensorMovement const &m) { + return concretize_abstracted_single_tensor_movement( + m, + /*pre_machine_stencils=*/pre_machine_stencils, + /*post_machine_stencils=*/post_machine_stencils); + }); + + auto merge_tensor_set_movements = + [](TensorSetMovement const &lhs, + TensorSetMovement const &rhs) -> TensorSetMovement { + return TensorSetMovement{ + binary_merge_maps_with( + lhs.edge_to_size, + rhs.edge_to_size, + [](num_bytes_t l, num_bytes_t r) { + return l + r; + }), + }; }; + + return foldl(single_tensor_movements, empty_tensor_set_movement(), merge_tensor_set_movements); } } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc index 0e0f60c891..8dc658c678 100644 --- a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc +++ b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc @@ -1,62 +1,112 @@ #include "compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_communication.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" #include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h" +#include "op-attrs/operator_task_space_to_operator_task_space_mapping.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/tensor_shape.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" +#include "utils/containers/binary_cartesian_product.h" #include "utils/containers/generate_map.h" #include "utils/containers/get_only.h" +#include "utils/containers/group_by.h" +#include "utils/containers/map_from_pairs.h" +#include "utils/containers/merge_maps_with.h" #include "utils/containers/values.h" +#include "utils/containers/vector_of.h" +#include "utils/containers/flatmap.h" +#include "utils/containers/unordered_multiset_of.h" +#include "utils/bidict/algorithms/unordered_set_of.h" +#include "utils/containers/transform.h" namespace FlexFlow { +AbstractedSingleTensorMovement + get_abstracted_single_tensor_movement_along_edge( + ParallelComputationGraph const &pcg, + ParallelComputationGraphEdge const &edge, + BinaryTreePath const &src_path, + BinaryTreePath const &dst_path) { + + parallel_layer_guid_t pcg_src = get_src_layer(edge); + parallel_layer_guid_t pcg_dst = get_dst_layer(edge); + + parallel_tensor_guid_t parallel_tensor = get_parallel_tensor(edge); + TensorShape tensor_piece = get_piece_shape(get_parallel_tensor_shape(pcg, parallel_tensor)); + + OperatorTaskSpaceToOperatorTaskSpaceMapping + mapping = pcg_get_mapping_along_edge(pcg, edge); + + bidict coord_mapping = op_to_op_get_coord_mapping(mapping); + + std::unordered_map + single_comms = map_from_pairs( + transform(unordered_set_of(coord_mapping), + [&](std::pair const &src_dst) + -> std::pair { + auto [src_task_coord, dst_task_coord] = src_dst; + + return std::pair{ + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/src_task_coord, + /*dst=*/AbstractedDevice{dst_path, dst_task_coord}, + }, + get_size_in_bytes(tensor_piece), + }; + })); + + return AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src_path, + /*edge_to_size=*/single_comms, + }; +} + AbstractedTensorSetMovement get_abstracted_tensor_set_movement_across_split( TransitiveReducedPCG const &tr_pcg, PCGBinarySeriesSplit const &split) { std::unordered_set edges_across_split = pcg_get_transitive_reduced_edges_across_split(tr_pcg, split); - auto get_movement_for_tensor = - [&](parallel_tensor_guid_t const &t) -> AbstractedSingleTensorMovement { - std::unordered_set tensor_edges = - filter(edges_across_split, [&](ParallelComputationGraphEdge const &e) { - return get_parallel_tensor(e) == t; - }); - - std::unordered_set src_layers = - transform(tensor_edges, [&](ParallelComputationGraphEdge const &e) { - return get_src_layer(e); - }); - - std::unordered_set dst_layers = - transform(tensor_edges, [&](ParallelComputationGraphEdge const &e) { - return get_dst_layer(e); - }); - - return AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/get_parallel_tensor_shape(tr_pcg.full_pcg, t), - /*src_machine_views=*/ - transform(src_layers, - [&](parallel_layer_guid_t const &l) { - return get_only( - find_paths_to_leaf(split.get_left_child(), l)); - }), - /*dst_machine_views=*/ - transform(dst_layers, - [&](parallel_layer_guid_t const &l) { - return get_only( - find_paths_to_leaf(split.get_right_child(), l)); - }), - }; + OneToMany< + parallel_tensor_guid_t, + ParallelComputationGraphEdge + > edges_by_tensor = group_by(edges_across_split, + [](ParallelComputationGraphEdge const &e) { + return get_parallel_tensor(e); + }); + + auto get_src_layer_path = [&](parallel_layer_guid_t layer) -> BinaryTreePath { + return get_only(find_paths_to_leaf(split.get_left_child(), layer)); + }; + + auto get_dst_layer_path = [&](parallel_layer_guid_t layer) -> BinaryTreePath { + return get_only(find_paths_to_leaf(split.get_right_child(), layer)); }; - std::unordered_map - single_tensor_movements = generate_map( - pcg_get_transitive_reduced_tensors_across_split(tr_pcg, split), - get_movement_for_tensor); + auto to_abstracted_single_tensor_movement = [&](ParallelComputationGraphEdge const &pcg_edge) + -> AbstractedSingleTensorMovement { + + parallel_layer_guid_t pcg_src = get_src_layer(pcg_edge); + parallel_layer_guid_t pcg_dst = get_dst_layer(pcg_edge); + + return get_abstracted_single_tensor_movement_along_edge( + /*pcg=*/tr_pcg.full_pcg, + /*edge=*/pcg_edge, + /*src_path=*/get_src_layer_path(pcg_src), + /*dst_path=*/get_dst_layer_path(pcg_dst)); + }; return AbstractedTensorSetMovement{ - values(single_tensor_movements), + transform( + edges_by_tensor.right_groups(), + [&](std::unordered_set const &edges) { + return merge_abstracted_single_tensor_movements( + transform(unordered_multiset_of(edges), to_abstracted_single_tensor_movement)); + }), }; } diff --git a/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.cc b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.cc new file mode 100644 index 0000000000..0194fde815 --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.cc @@ -0,0 +1,18 @@ +#include "compiler/machine_mapping/abstracted_tensor_set_movement/machine_space_stencil.h" +#include "pcg/machine_view.h" + +namespace FlexFlow { + +MachineSpaceCoordinate + machine_space_stencil_compute_machine_coord( + MachineSpaceStencil const &machine_space_stencil, + TaskSpaceCoordinate const &task_space_coordinate) { + + return get_machine_space_coordinate( + /*operator_task_space=*/machine_space_stencil.operator_task_space, + /*machine_view=*/machine_space_stencil.machine_view, + /*task_space_coordinate=*/task_space_coordinate); +} + + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc b/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc deleted file mode 100644 index e921a0c465..0000000000 --- a/lib/compiler/src/compiler/machine_mapping/get_machine_resource_splits.cc +++ /dev/null @@ -1,34 +0,0 @@ -#include "compiler/machine_mapping/get_machine_resource_splits.h" -#include "utils/hash/pair.h" - -namespace FlexFlow { - -std::unordered_set> - get_machine_resource_splits(MachineSpecification const &resource) { - std::unordered_set> - result; - - for (int i = 1; i < resource.num_nodes; i *= 2) { - MachineSpecification sub_resource1 = resource; - MachineSpecification sub_resource2 = resource; - sub_resource1.num_nodes = positive_int{i}; - sub_resource2.num_nodes = - positive_int{resource.num_nodes.int_from_positive_int() - i}; - result.insert(std::make_pair(sub_resource1, sub_resource2)); - result.insert(std::make_pair(sub_resource2, sub_resource1)); - } - - for (int i = 1; i < resource.num_gpus_per_node; i *= 2) { - MachineSpecification sub_resource1 = resource; - MachineSpecification sub_resource2 = resource; - sub_resource1.num_gpus_per_node = positive_int{i}; - sub_resource2.num_gpus_per_node = - positive_int{resource.num_gpus_per_node.int_from_positive_int() - i}; - result.insert(std::make_pair(sub_resource1, sub_resource2)); - result.insert(std::make_pair(sub_resource2, sub_resource1)); - } - - return result; -} - -} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index 8ca033d0d6..54f86eeffb 100644 --- a/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -1,18 +1,21 @@ #include "compiler/machine_mapping/get_optimal_machine_mapping.h" #include "compiler/cost_estimator/op_cost_metrics.dtg.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" -#include "compiler/machine_mapping/get_machine_resource_splits.h" #include "compiler/machine_mapping/machine_mapping_cache.h" #include "compiler/machine_mapping/machine_mapping_constraints.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_runtime_only_op_cost_estimate_key.h" #include "compiler/machine_mapping/machine_mapping_result.h" +#include "compiler/machine_mapping/machine_resource_split.dtg.h" +#include "compiler/machine_mapping/machine_resource_split.h" +#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" #include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.dtg.h" #include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h" +#include "op-attrs/computation_graph_op_attrs.h" +#include "op-attrs/parallel_tensor_shape.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" @@ -23,6 +26,7 @@ #include "utils/containers/unordered_set_of.h" #include "utils/exception.h" #include "utils/overload.h" +#include "op-attrs/get_operator_task_space.h" namespace FlexFlow { @@ -30,7 +34,7 @@ MachineMappingResult get_optimal_machine_mapping(MachineMappingCache &result_cache, MachineMappingContext const &context, MachineMappingProblemTree const &problem_tree, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints) { MachineMappingState state = MachineMappingState{ @@ -75,21 +79,21 @@ MachineMappingResult get_optimal_machine_mapping(MachineMappingCache &result_cache, MachineMappingContext const &context, MMProblemTreeSeriesSplit const &series_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints, std::optional const ¶llel_split_transformation) { auto get_boundary_machine_view_assignments = - [&](std::unordered_set const &boundary_layers) + [&](MachineMappingProblemTree const &root, + std::unordered_set const &boundary_layers) -> std::unordered_set { std::unordered_map> allowed = generate_map( boundary_layers, [&](BinaryTreePath const &l) -> std::unordered_set { UnmappedRuntimeOnlyOpCostEstimateKey leaf = - mm_problem_tree_get_subtree_at_path( - MachineMappingProblemTree{series_split}, l) + mm_problem_tree_get_subtree_at_path(root, l) .value() .get(); return context.allowed_machine_views(leaf, resources); @@ -139,7 +143,9 @@ MachineMappingResult for (ParallelLayerGuidObliviousMachineMapping const &assigned_pre_machine_views : - get_boundary_machine_view_assignments(get_src_layers(tensor_movement))) { + get_boundary_machine_view_assignments( + series_split.get_left_child(), + get_src_layers(tensor_movement))) { MachineMappingResult pre_result = eval_pre_boundary_mapping(assigned_pre_machine_views); @@ -147,6 +153,7 @@ MachineMappingResult for (ParallelLayerGuidObliviousMachineMapping const &assigned_post_machine_views : get_boundary_machine_view_assignments( + series_split.get_right_child(), get_dst_layers(tensor_movement))) { MachineMappingResult post_result = @@ -155,8 +162,15 @@ MachineMappingResult TensorSetMovement comm_across_split = concretize_abstracted_tensor_set_movement( tensor_movement, - /*pre_mapping=*/assigned_pre_machine_views, - /*post_mapping=*/assigned_post_machine_views); + /*pre_machine_stencils=*/ + get_machine_stencils_for_partially_mapped_mm_problem_tree( + series_split.get_left_child(), + assigned_pre_machine_views), + /*post_machine_stencils=*/ + get_machine_stencils_for_partially_mapped_mm_problem_tree( + series_split.get_right_child(), + assigned_post_machine_views)); + milliseconds_t cost_across_split = context.cost_estimator.estimate_cost(comm_across_split); @@ -175,7 +189,7 @@ MachineMappingResult get_optimal_machine_mapping( MachineMappingCache &result_cache, MachineMappingContext const &context, MMProblemTreeParallelSplit const ¶llel_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints) { MachineMappingProblemTree lhs = parallel_split.get_left_child(); @@ -202,18 +216,19 @@ MachineMappingResult get_optimal_machine_mapping( restrict_to_right_child(constraints); auto evaluate_resource_split = - [&](std::pair const - &resource_split) { + [&](MachineResourceSplit const &resource_split) { + auto [lhs_resources, rhs_resources] = apply_resource_split(resource_split, resources); + MachineMappingResult left_result = get_optimal_machine_mapping( - result_cache, context, lhs, resource_split.first, left_constraints); + result_cache, context, lhs, lhs_resources, left_constraints); MachineMappingResult right_result = get_optimal_machine_mapping(result_cache, context, rhs, - resource_split.second, + rhs_resources, right_constraints); - return parallel_combine(left_result, right_result); + return parallel_combine(resource_split, left_result, right_result); }; std::unordered_set parallel_results = transform( @@ -227,7 +242,7 @@ MachineMappingResult get_optimal_machine_mapping( MachineMappingCache &result_cache, MachineMappingContext const &context, UnmappedRuntimeOnlyOpCostEstimateKey const &leaf, - MachineSpecification const &resource, + MachineComputeResourceSlice const &resource, MachineMappingConstraints const &constraints) { std::unordered_set candidates = [&] { diff --git a/lib/compiler/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc b/lib/compiler/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc index 6cc3f4329c..04ac394433 100644 --- a/lib/compiler/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc +++ b/lib/compiler/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc @@ -1,7 +1,9 @@ #include "compiler/machine_mapping/get_tensor_set_movement_across_split.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h" +#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" +#include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" @@ -9,6 +11,7 @@ #include "utils/containers/keys.h" #include "utils/containers/sum.h" #include "utils/containers/values.h" +#include "utils/containers/map_values.h" namespace FlexFlow { @@ -17,10 +20,29 @@ TensorSetMovement get_tensor_set_movement_across_split( PCGBinarySeriesSplit const &split, ParallelLayerGuidObliviousMachineMapping const &pre_mapping, ParallelLayerGuidObliviousMachineMapping const &post_mapping) { + AbstractedTensorSetMovement abstracted = get_abstracted_tensor_set_movement_across_split(tr_pcg, split); + + auto get_task_spaces = [&](PCGBinarySPDecomposition const &t) + -> std::unordered_map { + return map_values(pcg_sp_tree_get_path_to_leaf_map(t), + [&](parallel_layer_guid_t parallel_layer_guid) { + return get_operator_task_space(tr_pcg.full_pcg, parallel_layer_guid); + }); + + }; + + std::unordered_map + pre_stencils = get_machine_stencils_for_decomposition(tr_pcg.full_pcg, split.get_left_child(), pre_mapping); + + std::unordered_map + post_stencils = get_machine_stencils_for_decomposition(tr_pcg.full_pcg, split.get_right_child(), post_mapping); + return concretize_abstracted_tensor_set_movement( - abstracted, pre_mapping, post_mapping); + abstracted, + /*pre_machine_stencils=*/pre_stencils, + /*post_machine_stencils=*/post_stencils); } } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/machine_compute_resource_slice.cc b/lib/compiler/src/compiler/machine_mapping/machine_compute_resource_slice.cc new file mode 100644 index 0000000000..b3538b8d43 --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/machine_compute_resource_slice.cc @@ -0,0 +1,14 @@ +#include "compiler/machine_mapping/machine_compute_resource_slice.h" + +namespace FlexFlow { + +MachineComputeResourceSlice + compute_slice_from_specification(MachineComputeSpecification const &spec) { + + return MachineComputeResourceSlice{ + /*num_nodes=*/spec.num_nodes, + /*num_gpus_per_node=*/spec.num_gpus_per_node, + }; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc index 82c8274808..18d5fb484f 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping.cc @@ -1,14 +1,15 @@ #include "compiler/machine_mapping/machine_mapping.h" #include "utils/containers/are_disjoint.h" #include "utils/containers/keys.h" -#include "utils/containers/merge_maps.h" +#include "utils/containers/binary_merge_disjoint_maps.h" namespace FlexFlow { MachineMapping combine_disjoint_mappings(MachineMapping const &m1, MachineMapping const &m2) { return MachineMapping{ - merge_disjoint_maps(m1.machine_views, m2.machine_views)}; + binary_merge_disjoint_maps(m1.machine_views, m2.machine_views), + }; } bool nodes_are_disjoint(MachineMapping const &m1, MachineMapping const &m2) { diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping_cache.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping_cache.cc index fbfccf737f..a430eed7a5 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping_cache.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping_cache.cc @@ -1,6 +1,7 @@ #include "compiler/machine_mapping/machine_mapping_cache.h" #include "utils/containers/contains_key.h" #include "utils/containers/try_at.h" +#include namespace FlexFlow { @@ -17,12 +18,9 @@ std::optional void machine_mapping_cache_save(MachineMappingCache &cache, MachineMappingState const &k, MachineMappingResult const &v) { - if (contains_key(cache.raw_map, k)) { - throw mk_runtime_error( - fmt::format("machine_mapping_cache_save expected key to not already " - "exist, but received existing key {}", - k)); - } + ASSERT(!contains_key(cache.raw_map, k), + "machine_mapping_cache_save expected key to not already exist", + k); cache.raw_map.emplace(k, v); } diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping_constraints.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping_constraints.cc index 2cee866a01..dd078f9fdc 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping_constraints.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping_constraints.cc @@ -80,16 +80,15 @@ MachineMappingConstraints with_additional_constraints( if (!current_machine_view.has_value()) { result.machine_views.at(layer) = machine_view; } else { - if (current_machine_view.value() != machine_view) { - throw mk_runtime_error( + ASSERT(current_machine_view.value() == machine_view, fmt::format("with_additional_layer_machine_views received machine " "view assignment for layer {} " "to machine view {}, but that layer is already " "assigned to machine view {}.", layer, machine_view, - current_machine_view.value())); - } + current_machine_view.value()) + ); } } @@ -98,13 +97,11 @@ MachineMappingConstraints with_additional_constraints( std::optional require_only_root(MachineMappingConstraints const &constraints) { - if (keys(constraints.machine_views) != - std::unordered_set{binary_tree_root_path()}) { - throw mk_runtime_error( - fmt::format("require_only_root expected constraints to have only a " + ASSERT(keys(constraints.machine_views) == + std::unordered_set{binary_tree_root_path()}, + fmt::format("require_only_root expected constraints to have only a " "single key (the root path), but received {}", constraints)); - } return constraints.machine_views.at(binary_tree_root_path()); } diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.cc index 09323b1800..bcbb655a62 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.cc @@ -1,6 +1,7 @@ #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" #include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_all_leaf_paths.h" #include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_leaves.h" +#include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_path_to_leaf_map.h" #include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_subtree_at_path.h" namespace FlexFlow { @@ -89,4 +90,9 @@ std::optional tree, generic_binary_sp_impl_for_mm_problem_tree(), path); } +std::unordered_map + mm_problem_tree_get_path_to_leaf_map(MachineMappingProblemTree const &tree) { + return get_path_to_leaf_map(tree, generic_binary_sp_impl_for_mm_problem_tree()); +} + } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/machine_mapping_result.cc b/lib/compiler/src/compiler/machine_mapping/machine_mapping_result.cc index a370a6803d..54f0a35528 100644 --- a/lib/compiler/src/compiler/machine_mapping/machine_mapping_result.cc +++ b/lib/compiler/src/compiler/machine_mapping/machine_mapping_result.cc @@ -1,8 +1,8 @@ #include "compiler/machine_mapping/machine_mapping_result.h" #include "compiler/machine_mapping/machine_mapping.h" +#include "compiler/machine_mapping/machine_resource_split.h" #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" #include "utils/containers/map_keys.h" -#include "utils/containers/merge_maps.h" #include "utils/full_binary_tree/binary_tree_path.h" namespace FlexFlow { @@ -72,7 +72,8 @@ MachineMappingResult } MachineMappingResult - parallel_combine(MachineMappingResult const &maybe_lhs_result, + parallel_combine(MachineResourceSplit const &split, + MachineMappingResult const &maybe_lhs_result, MachineMappingResult const &maybe_rhs_result) { FeasibleMachineMappingResult lhs_result = ({ if (is_infeasible(maybe_lhs_result)) { @@ -92,8 +93,9 @@ MachineMappingResult FeasibleMachineMappingResult{ /*runtime=*/std::max(lhs_result.runtime, rhs_result.runtime), /*machine_mapping=*/ - binary_combine_mappings(/*lhs=*/lhs_result.machine_mapping, - /*rhs=*/rhs_result.machine_mapping), + binary_combine_mappings( + /*lhs=*/lhs_result.machine_mapping, + /*rhs=*/offset_layer_oblivious_mapping_by(rhs_result.machine_mapping, split)), }, }; } diff --git a/lib/compiler/src/compiler/machine_mapping/machine_resource_split.cc b/lib/compiler/src/compiler/machine_mapping/machine_resource_split.cc new file mode 100644 index 0000000000..37dddfc99d --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/machine_resource_split.cc @@ -0,0 +1,122 @@ +#include "compiler/machine_mapping/machine_resource_split.h" +#include "utils/containers/map_values.h" +#include + +namespace FlexFlow { + +std::pair + apply_resource_split(MachineResourceSplit const &split, + MachineComputeResourceSlice const &resources) { + if (split.dimension == MachineSpecificationDimension::INTER_NODE) { + ASSERT(split.offset < resources.num_nodes); + + return { + MachineComputeResourceSlice{ + /*num_nodes=*/split.offset, + /*num_gpus_per_node=*/resources.num_gpus_per_node, + }, + MachineComputeResourceSlice{ + /*num_nodes=*/positive_int{ + resources.num_nodes.int_from_positive_int() - split.offset.int_from_positive_int() + }, + /*num_gpus_per_node=*/resources.num_gpus_per_node, + }, + }; + } else { + ASSERT(split.dimension == MachineSpecificationDimension::INTRA_NODE); + + ASSERT(split.offset < resources.num_gpus_per_node); + + return { + MachineComputeResourceSlice{ + /*num_nodes=*/resources.num_nodes, + /*num_gpus_per_node=*/split.offset, + }, + MachineComputeResourceSlice{ + /*num_nodes=*/resources.num_nodes, + /*num_gpus_per_node=*/positive_int{ + resources.num_gpus_per_node.int_from_positive_int() - split.offset.int_from_positive_int(), + }, + }, + }; + } +} + + +std::unordered_set + get_machine_resource_splits(MachineComputeResourceSlice const &resources) { + + std::unordered_set result; + + for (positive_int i = 1_p; i < resources.num_nodes; i *= 2_p) { + result.insert(MachineResourceSplit{ + /*offset=*/i, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }); + result.insert(MachineResourceSplit{ + /*offset=*/positive_int{ + resources.num_nodes.int_from_positive_int() - i.int_from_positive_int(), + }, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }); + } + + for (positive_int i = 1_p; i < resources.num_gpus_per_node; i *= 2_p) { + result.insert(MachineResourceSplit{ + /*offset=*/i, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }); + result.insert(MachineResourceSplit{ + /*offset=*/positive_int{ + resources.num_gpus_per_node.int_from_positive_int() - i.int_from_positive_int(), + }, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }); + } + + return result; +} + +MachineSpaceCoordinate + offset_machine_space_coordinate_by(MachineSpaceCoordinate const &coord, + MachineResourceSplit const &split) { + if (split.dimension == MachineSpecificationDimension::INTER_NODE) { + return MachineSpaceCoordinate{ + /*node_idx=*/(coord.node_idx + split.offset).nonnegative_int_from_positive_int(), + /*device_idx=*/coord.device_idx, + /*device_type=*/coord.device_type, + }; + } else { + ASSERT(split.dimension == MachineSpecificationDimension::INTRA_NODE); + + return MachineSpaceCoordinate{ + /*node_idx=*/coord.node_idx, + /*device_idx=*/(coord.device_idx + split.offset).nonnegative_int_from_positive_int(), + /*device_type=*/coord.device_type, + }; + } +} + +MachineView + offset_machine_view_by(MachineView const &machine_view, + MachineResourceSplit const &split) { + return MachineView{ + /*start=*/offset_machine_space_coordinate_by(machine_view.start, split), + /*dimensions=*/machine_view.dimensions, + }; +} + +ParallelLayerGuidObliviousMachineMapping + offset_layer_oblivious_mapping_by(ParallelLayerGuidObliviousMachineMapping const &mapping, + MachineResourceSplit const &split) { + + return ParallelLayerGuidObliviousMachineMapping{ + map_values(mapping.raw_mapping, + [&](MachineView const &mv) { + return offset_machine_view_by(mv, split); + }), + }; +} + + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc b/lib/compiler/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc index 74e8db6304..f225d4b8c8 100644 --- a/lib/compiler/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc +++ b/lib/compiler/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc @@ -1,16 +1,17 @@ #include "compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" -#include "compiler/machine_mapping/get_machine_resource_splits.h" #include "compiler/machine_mapping/machine_mapping_constraints.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.h" +#include "compiler/machine_mapping/machine_resource_split.dtg.h" +#include "compiler/machine_mapping/machine_resource_split.h" #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.h" #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h" +#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" #include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.dtg.h" #include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" @@ -28,7 +29,7 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, MachineMappingProblemTree const &problem_tree, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints) { MachineMappingState state = MachineMappingState{ @@ -73,25 +74,27 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, MMProblemTreeSeriesSplit const &series_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints, std::optional const ¶llel_split_transformation) { auto get_boundary_machine_view_assignments = - [&](std::unordered_set const &boundary_layers) + [&](MachineMappingProblemTree const &root, + std::unordered_set const &boundary_layers) -> std::unordered_set { + std::unordered_map> allowed = generate_map( boundary_layers, [&](BinaryTreePath const &l) -> std::unordered_set { UnmappedRuntimeOnlyOpCostEstimateKey leaf = - mm_problem_tree_get_subtree_at_path( - MachineMappingProblemTree{series_split}, l) + mm_problem_tree_get_subtree_at_path(root, l) .value() .get(); return context.allowed_machine_views(leaf, resources); }); + return transform( get_all_assignments(allowed), [](std::unordered_map const &m) { @@ -140,7 +143,9 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( for (ParallelLayerGuidObliviousMachineMapping const &assigned_pre_machine_views : - get_boundary_machine_view_assignments(get_src_layers(tensor_movement))) { + get_boundary_machine_view_assignments( + series_split.get_left_child(), + get_src_layers(tensor_movement))) { MachineMappingWithMemoryResult pre_result = eval_pre_boundary_mapping(assigned_pre_machine_views); @@ -148,6 +153,7 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( for (ParallelLayerGuidObliviousMachineMapping const &assigned_post_machine_views : get_boundary_machine_view_assignments( + series_split.get_right_child(), get_dst_layers(tensor_movement))) { MachineMappingWithMemoryResult post_result = @@ -156,8 +162,15 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( TensorSetMovement comm_across_split = concretize_abstracted_tensor_set_movement( tensor_movement, - /*pre_mapping=*/assigned_pre_machine_views, - /*post_mapping=*/assigned_post_machine_views); + /*pre_machine_stencils=*/ + get_machine_stencils_for_partially_mapped_mm_problem_tree( + series_split.get_left_child(), + assigned_pre_machine_views), + /*post_machine_stencils=*/ + get_machine_stencils_for_partially_mapped_mm_problem_tree( + series_split.get_right_child(), + assigned_post_machine_views)); + milliseconds_t cost_across_split = context.cost_estimator.estimate_cost(comm_across_split); @@ -176,7 +189,7 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, MMProblemTreeParallelSplit const ¶llel_split, - MachineSpecification const &resources, + MachineComputeResourceSlice const &resources, MachineMappingConstraints const &constraints) { MachineMappingProblemTree lhs = parallel_split.get_left_child(); @@ -204,22 +217,23 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( restrict_to_right_child(constraints); auto evaluate_resource_split = - [&](std::pair const - &resource_split) { + [&](MachineResourceSplit const &resource_split) { + auto [lhs_resources, rhs_resources] = apply_resource_split(resource_split, resources); + MachineMappingWithMemoryResult left_result = get_optimal_machine_mapping_with_memory(result_cache, context, lhs, - resource_split.first, + lhs_resources, left_constraints); MachineMappingWithMemoryResult right_result = get_optimal_machine_mapping_with_memory(result_cache, context, rhs, - resource_split.second, + rhs_resources, right_constraints); - return parallel_combine(left_result, right_result); + return parallel_combine(resource_split, left_result, right_result); }; std::unordered_set parallel_results = @@ -234,7 +248,7 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory( MachineMappingWithMemoryCache &result_cache, MachineMappingWithMemoryContext const &context, UnmappedRuntimeOnlyOpCostEstimateKey const &leaf, - MachineSpecification const &resource, + MachineComputeResourceSlice const &resource, MachineMappingConstraints const &constraints) { std::unordered_set candidates = [&] { diff --git a/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.cc b/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.cc index 617ba682be..c17e306aa8 100644 --- a/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.cc +++ b/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.cc @@ -1,6 +1,7 @@ #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.h" #include "utils/containers/contains_key.h" #include "utils/containers/try_at.h" +#include namespace FlexFlow { @@ -19,12 +20,10 @@ void machine_mapping_with_memory_cache_save( MachineMappingWithMemoryCache &cache, MachineMappingState const &k, MachineMappingWithMemoryResult const &v) { - if (contains_key(cache.raw_map, k)) { - throw mk_runtime_error(fmt::format( - "machine_mapping_with_memory_cache_save expected key to not already " - "exist, but received existing key {}", - k)); - } + ASSERT(!contains_key(cache.raw_map, k), + "machine_mapping_with_memory_cache_save expected key to not already " + "exist", + k); cache.raw_map.emplace(k, v); } diff --git a/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc b/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc index cff7984897..85adedfcf3 100644 --- a/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc +++ b/lib/compiler/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc @@ -1,10 +1,53 @@ #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.h" +#include "compiler/machine_mapping/machine_resource_split.h" #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" #include "utils/containers/set_union.h" #include "utils/full_binary_tree/binary_tree_path.h" +#include "utils/containers/all_of.h" +#include "utils/containers/transform.h" +#include "compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.h" +#include "utils/hash/tuple.h" +#include "utils/hash/unordered_set.h" namespace FlexFlow { +MachineMappingWithMemoryResult::MachineMappingWithMemoryResult( + std::unordered_set const &pareto_frontier) + : m_pareto_frontier(pareto_frontier) +{ + ASSERT( + all_of(pareto_frontier, + [&](ParetoOptimalMachineMapping const &m) { + return is_pareto_optimal_in(m, pareto_frontier); + })); +} + +bool MachineMappingWithMemoryResult::operator==(MachineMappingWithMemoryResult const &other) const { + return this->tie() == other.tie(); +} + +bool MachineMappingWithMemoryResult::operator!=(MachineMappingWithMemoryResult const &other) const { + return this->tie() != other.tie(); +} + +std::unordered_set const &MachineMappingWithMemoryResult::get_pareto_frontier() const { + return this->m_pareto_frontier; +} + +std::string format_as(MachineMappingWithMemoryResult const &r) { + return fmt::format("", r.get_pareto_frontier()); +} + +std::ostream &operator<<(std::ostream &s, MachineMappingWithMemoryResult const &r) { + return (s << fmt::to_string(r)); +} + +std::tuple< + std::unordered_set const & +> MachineMappingWithMemoryResult::tie() const { + return std::tie(this->m_pareto_frontier); +} + MachineMappingWithMemoryResult empty_machine_mapping_with_memory_result() { return MachineMappingWithMemoryResult{ {}, @@ -23,29 +66,6 @@ MachineMappingWithMemoryResult get_mapping_with_minimal_runtime( return result; } -MachineMappingWithMemoryResult remove_non_pareto_optimal_machine_mapping_result( - MachineMappingWithMemoryResult const &result) { - std::unordered_set non_pareto_optimal_mappings; - for (MachineMappingForSingleLayer const &mapping : result.machine_mappings) { - bool is_pareto_optimal = true; - for (MachineMappingForSingleLayer const &other_mapping : - result.machine_mappings) { - if (mapping.cost.forward_runtime >= other_mapping.cost.forward_runtime && - mapping.cost.backward_runtime >= - other_mapping.cost.backward_runtime && - mapping.cost.memory_usage >= other_mapping.cost.memory_usage && - mapping != other_mapping) { - is_pareto_optimal = false; - break; - } - } - if (is_pareto_optimal) { - non_pareto_optimal_mappings.insert(mapping); - } - } - return MachineMappingWithMemoryResult{std::move(non_pareto_optimal_mappings)}; -} - MachineMappingWithMemoryResult series_combine(milliseconds_t comm_cost, MachineMappingWithMemoryResult const &pre_result, @@ -53,8 +73,8 @@ MachineMappingWithMemoryResult std::optional const ¶llel_split_transformation) { auto combine_machine_mapping = - [&](MachineMappingForSingleLayer const &pre_mm, - MachineMappingForSingleLayer const &post_mm) { + [&](ParetoOptimalMachineMapping const &pre_mm, + ParetoOptimalMachineMapping const &post_mm) { OpCostMetrics cost = OpCostMetrics{ /*forward_runtime=*/pre_mm.cost.forward_runtime + comm_cost + post_mm.cost.forward_runtime, @@ -76,28 +96,34 @@ MachineMappingWithMemoryResult } }(); - return MachineMappingForSingleLayer{cost, mapping}; + return ParetoOptimalMachineMapping{cost, mapping}; }; - MachineMappingWithMemoryResult result = - empty_machine_mapping_with_memory_result(); - for (MachineMappingForSingleLayer const &pre_mm : - pre_result.machine_mappings) { - for (MachineMappingForSingleLayer const &post_mm : - post_result.machine_mappings) { - result.machine_mappings.insert(combine_machine_mapping(pre_mm, post_mm)); + std::unordered_set result; + + for (ParetoOptimalMachineMapping const &pre_mm : + pre_result.get_pareto_frontier()) { + for (ParetoOptimalMachineMapping const &post_mm : + post_result.get_pareto_frontier()) { + result.insert(combine_machine_mapping(pre_mm, post_mm)); } } - return remove_non_pareto_optimal_machine_mapping_result(result); + return MachineMappingWithMemoryResult{ + /*pareto_frontier=*/filter(result, + [&](ParetoOptimalMachineMapping const &m) { + return is_pareto_optimal_in(m, result); + }), + }; } MachineMappingWithMemoryResult - parallel_combine(MachineMappingWithMemoryResult const &lhs_result, + parallel_combine(MachineResourceSplit const &split, + MachineMappingWithMemoryResult const &lhs_result, MachineMappingWithMemoryResult const &rhs_result) { auto combine_machine_mapping = - [&](MachineMappingForSingleLayer const &lhs_mm, - MachineMappingForSingleLayer const &rhs_mm) { + [&](ParetoOptimalMachineMapping const &lhs_mm, + ParetoOptimalMachineMapping const &rhs_mm) { OpCostMetrics cost = OpCostMetrics{ /*forward_runtime=*/ std::max(lhs_mm.cost.forward_runtime, rhs_mm.cost.forward_runtime), @@ -109,39 +135,51 @@ MachineMappingWithMemoryResult }; ParallelLayerGuidObliviousMachineMapping mapping = - binary_combine_mappings(lhs_mm.machine_mapping, - rhs_mm.machine_mapping); + binary_combine_mappings( + lhs_mm.machine_mapping, + offset_layer_oblivious_mapping_by(rhs_mm.machine_mapping, split)); - return MachineMappingForSingleLayer{cost, mapping}; + return ParetoOptimalMachineMapping{cost, mapping}; }; - MachineMappingWithMemoryResult result = - empty_machine_mapping_with_memory_result(); - for (MachineMappingForSingleLayer const &lhs_mm : - lhs_result.machine_mappings) { - for (MachineMappingForSingleLayer const &rhs_mm : - rhs_result.machine_mappings) { - result.machine_mappings.insert(combine_machine_mapping(lhs_mm, rhs_mm)); + std::unordered_set result; + for (ParetoOptimalMachineMapping const &lhs_mm : + lhs_result.get_pareto_frontier()) { + + for (ParetoOptimalMachineMapping const &rhs_mm : + rhs_result.get_pareto_frontier()) { + + result.insert(combine_machine_mapping(lhs_mm, rhs_mm)); } } - return remove_non_pareto_optimal_machine_mapping_result(result); + return MachineMappingWithMemoryResult{ + /*pareto_frontier=*/filter(result, + [&](ParetoOptimalMachineMapping const &m) { + return is_pareto_optimal_in(m, result); + }), + }; } MachineMappingWithMemoryResult minimize_runtime(MachineMappingWithMemoryResult const &m1, MachineMappingWithMemoryResult const &m2) { - MachineMappingWithMemoryResult result = MachineMappingWithMemoryResult{ - set_union(m1.machine_mappings, m2.machine_mappings), - }; - return remove_non_pareto_optimal_machine_mapping_result(result); + std::unordered_set result = + set_union(m1.get_pareto_frontier(), m2.get_pareto_frontier()); + + return MachineMappingWithMemoryResult{ + /*pareto_frontier=*/filter(result, + [&](ParetoOptimalMachineMapping const &m) { + return is_pareto_optimal_in(m, result); + }), + }; } MachineMappingWithMemoryResult make_singleton_machine_mapping_with_memory_result( OpCostMetrics cost, MachineView const &machine_view) { return MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ cost, ParallelLayerGuidObliviousMachineMapping{{ {binary_tree_root_path(), machine_view}, @@ -151,3 +189,11 @@ MachineMappingWithMemoryResult } } // namespace FlexFlow + +namespace std { + +size_t hash<::FlexFlow::MachineMappingWithMemoryResult>::operator()(::FlexFlow::MachineMappingWithMemoryResult const &r) const { + return get_std_hash(r.tie()); +} + +} diff --git a/lib/compiler/src/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.cc new file mode 100644 index 0000000000..d35ec7fb24 --- /dev/null +++ b/lib/compiler/src/compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.cc @@ -0,0 +1,16 @@ +#include "compiler/machine_mapping/memory_optimization/pareto_optimal_machine_mapping.h" +#include "utils/containers/transform.h" +#include "compiler/cost_estimator/op_cost_metrics.h" + +namespace FlexFlow { + +bool is_pareto_optimal_in(ParetoOptimalMachineMapping const &m, + std::unordered_set const &others) { + return is_pareto_optimal_in(m.cost, + transform(others, + [](ParetoOptimalMachineMapping const &m) { + return m.cost; + })); +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc b/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc index ed60004bf4..1774dcc50f 100644 --- a/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.cc @@ -1,8 +1,15 @@ #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h" +#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" +#include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h" +#include "op-attrs/computation_graph_op_attrs.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "utils/containers/map_keys.h" -#include "utils/containers/merge_maps.h" +#include "utils/containers/binary_merge_disjoint_maps.h" +#include "utils/containers/require_same.h" #include "utils/containers/try_at.h" #include "utils/full_binary_tree/binary_tree_path.h" +#include "op-attrs/get_operator_task_space.h" namespace FlexFlow { @@ -10,15 +17,98 @@ ParallelLayerGuidObliviousMachineMapping binary_combine_mappings( ParallelLayerGuidObliviousMachineMapping const &lhs, ParallelLayerGuidObliviousMachineMapping const &rhs) { return ParallelLayerGuidObliviousMachineMapping{ - merge_disjoint_maps(map_keys(lhs.raw_mapping, nest_inside_left_child), - map_keys(rhs.raw_mapping, nest_inside_right_child)), + binary_merge_disjoint_maps(map_keys(lhs.raw_mapping, nest_inside_left_child), + map_keys(rhs.raw_mapping, nest_inside_right_child)), }; } +ParallelLayerGuidObliviousMachineMapping + restrict_to_left_child(ParallelLayerGuidObliviousMachineMapping const &) { + NOT_IMPLEMENTED(); +} + +ParallelLayerGuidObliviousMachineMapping + restrict_to_right_child(ParallelLayerGuidObliviousMachineMapping const &) { + NOT_IMPLEMENTED(); +} + std::optional get_machine_view_for_path( ParallelLayerGuidObliviousMachineMapping const &mapping, BinaryTreePath const &path) { return try_at(mapping.raw_mapping, path); } +std::unordered_map + get_machine_stencils_for_decomposition(ParallelComputationGraph const &pcg, + PCGBinarySPDecomposition const &decomposition, + ParallelLayerGuidObliviousMachineMapping const &mapping) { + std::unordered_set leaf_paths = require_same( + pcg_sp_tree_get_all_leaf_paths(decomposition), + keys(mapping.raw_mapping)); + + std::unordered_map + path_to_op_task_space_map + = map_values(pcg_sp_tree_get_path_to_leaf_map(decomposition), + [&](parallel_layer_guid_t l) -> OperatorTaskSpace { + return get_operator_task_space(pcg, l); + }); + + return generate_map(leaf_paths, + [&](BinaryTreePath const &p) -> MachineSpaceStencil { + return MachineSpaceStencil{ + /*operator_task_space=*/path_to_op_task_space_map.at(p), + /*machine_view=*/mapping.raw_mapping.at(p), + }; + }); +} + +std::unordered_map> + get_machine_stencils_for_mm_problem_tree(MachineMappingProblemTree const &tree, + ParallelLayerGuidObliviousMachineMapping const &mapping) { + + std::unordered_map + tree_leaf_map = mm_problem_tree_get_path_to_leaf_map(tree); + + std::unordered_set mapping_paths = keys(mapping.raw_mapping); + std::unordered_set tree_paths = keys(tree_leaf_map); + + ASSERT(is_subseteq_of(mapping_paths, tree_paths)); + + return generate_map(tree_paths, + [&](BinaryTreePath const &p) -> std::optional { + if (!contains_key(mapping.raw_mapping, p)) { + return std::nullopt; + } + + UnmappedRuntimeOnlyOpCostEstimateKey leaf = tree_leaf_map.at(p); + + ComputationGraphOpAttrs leaf_op_attrs = + compgraph_op_attrs_from_pcg_op_attrs(leaf.op_attrs).value(); + + std::vector leaf_input_degrees = + transform(leaf.input_shapes, + [](ParallelTensorShape const &s) { + return get_parallel_degrees(s); + }); + + return MachineSpaceStencil{ + /*operator_task_space=*/get_operator_task_space(leaf_op_attrs, leaf_input_degrees), + /*machine_view=*/mapping.raw_mapping.at(p), + }; + }); +} + +std::unordered_map + get_machine_stencils_for_partially_mapped_mm_problem_tree( + MachineMappingProblemTree const &tree, + ParallelLayerGuidObliviousMachineMapping const &mappings) { + + return filtermap_values(get_machine_stencils_for_mm_problem_tree(tree, mappings), + [](std::optional const &s) { + return s; + }); +} + + + } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc b/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc index 63e359d9ac..e09e81a4cf 100644 --- a/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc +++ b/lib/compiler/src/compiler/machine_mapping/unstructured_device_mapping.cc @@ -1,10 +1,8 @@ - #include "compiler/machine_mapping/unstructured_device_mapping.h" #include "compiler/machine_mapping/unstructured_device_mapping.dtg.h" -#include "pcg/machine_specification.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/operator_task_space.h" #include "pcg/machine_view.h" -#include "pcg/operator_task_space.dtg.h" -#include "pcg/operator_task_space.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "utils/containers/keys.h" #include "utils/containers/map_values.h" @@ -13,7 +11,7 @@ namespace FlexFlow { UnstructuredDeviceMapping get_unstructured_device_mapping(MachineMapping const &machine_mapping, - MachineSpecification const &machine_spec, + MachineComputeSpecification const &machine_spec, ParallelComputationGraph const &pcg) { std::unordered_map> device_mapping; diff --git a/lib/compiler/src/compiler/mapped_operator_task_group.cc b/lib/compiler/src/compiler/mapped_operator_task_group.cc new file mode 100644 index 0000000000..2122d5691f --- /dev/null +++ b/lib/compiler/src/compiler/mapped_operator_task_group.cc @@ -0,0 +1,120 @@ +#include "compiler/mapped_operator_task_group.h" +#include "compiler/operator_atomic_task_shard_binding.h" +#include "compiler/task_signature_tensor_key.h" +#include "op-attrs/get_operator_task_space.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "pcg/machine_view.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/containers/require_all_same.h" +#include "compiler/task_signature_tensor_key.dtg.h" +#include "utils/containers/transform.h" +#include "utils/containers/vector_of.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/containers/are_all_distinct.h" +#include "utils/hash/tuple.h" + +namespace FlexFlow { + +MappedOperatorTaskGroup::MappedOperatorTaskGroup( + bidict const &shard_bindings) + : shard_bindings(shard_bindings) +{ + auto check_arity = [&](TensorRole tensor_role) -> nonnegative_int { + std::unordered_set arities = + transform(shard_bindings.right_values(), + [&](OperatorAtomicTaskShardBinding const &s) -> nonnegative_int { + return num_elements(ptensor_space_coords_for_role(s, tensor_role)); + }); + + return require_all_same(arities).value_or(0_n); + }; + + nonnegative_int num_inputs = check_arity(TensorRole::INPUT); + nonnegative_int num_weights = check_arity(TensorRole::WEIGHT); + nonnegative_int num_outputs = check_arity(TensorRole::OUTPUT); + + std::unordered_set all_keys = + all_keys_for_signature_arities( + /*num_inputs=*/num_inputs, + /*num_weights=*/num_weights, + /*num_outputs=*/num_outputs); + + for (TaskSignatureTensorKey const &key : all_keys) { + std::vector signatures_for_key = vector_of(shard_bindings.right_values()); + + std::vector coords_for_key = + transform(signatures_for_key, + [&](OperatorAtomicTaskShardBinding const &signature) { + return ptensor_space_coord_for_key(signature, key); + }); + + ASSERT(are_all_distinct(coords_for_key)); + + std::vector coord_dims_for_key = + transform(coords_for_key, + [](ParallelTensorSpaceCoordinate const &c) { + return ptensor_coord_num_dims(c); + }); + + require_all_same(coord_dims_for_key); + } +} + +bool MappedOperatorTaskGroup::operator==(MappedOperatorTaskGroup const &other) const { + return this->tie() == other.tie(); +} + +bool MappedOperatorTaskGroup::operator!=(MappedOperatorTaskGroup const &other) const { + return this->tie() == other.tie(); +} + +std::tuple< + bidict const & +> MappedOperatorTaskGroup::tie() const { + + return std::tie(this->shard_bindings); +} + +bidict const &MappedOperatorTaskGroup::get_shard_bindings() const { + return this->shard_bindings; +} + +std::string format_as(::FlexFlow::MappedOperatorTaskGroup const &m) { + return fmt::format("", m.get_shard_bindings()); +} + +std::ostream &operator<<(std::ostream &s, ::FlexFlow::MappedOperatorTaskGroup const &x) { + return (s << fmt::to_string(x)); +} + +MappedOperatorTaskGroup + mapped_operator_task_group_from_machine_view( + ComputationGraphOpAttrs const &op_attrs, + std::vector const &inputs_dim_degrees, + MachineView const &machine_view) { + + OperatorTaskSpace op_task_space = get_operator_task_space(op_attrs, inputs_dim_degrees); + + return MappedOperatorTaskGroup{ + generate_bidict(get_machine_space_coordinates(op_task_space, machine_view), + [&](MachineSpaceCoordinate const &machine_space_coord) { + return operator_atomic_task_shard_binding_from_machine_view( + op_attrs, + inputs_dim_degrees, + machine_view, + machine_space_coord); + }), + }; +} + + +} // namespace FlexFlow + +namespace std { + +size_t hash<::FlexFlow::MappedOperatorTaskGroup>::operator()(::FlexFlow::MappedOperatorTaskGroup const &x) const { + return ::FlexFlow::get_std_hash(x.tie()); +} + +} // namespace std diff --git a/lib/compiler/src/compiler/mapped_parallel_computation_graph.cc b/lib/compiler/src/compiler/mapped_parallel_computation_graph.cc new file mode 100644 index 0000000000..17f227c847 --- /dev/null +++ b/lib/compiler/src/compiler/mapped_parallel_computation_graph.cc @@ -0,0 +1,52 @@ +#include "compiler/mapped_parallel_computation_graph.h" +#include "op-attrs/computation_graph_op_attrs.h" + +namespace FlexFlow { + +MappedParallelComputationGraph + mapped_pcg_from_pcg_and_mapping( + ParallelComputationGraph const &pcg, + MachineMapping const &mapping) { + + return MappedParallelComputationGraph{ + /*pcg=*/pcg, + /*mapped_tasks=*/ + generate_map( + get_parallel_layers(pcg), + [&](parallel_layer_guid_t l) -> MappedOperatorTaskGroup { + ComputationGraphOpAttrs op_attrs = + compgraph_op_attrs_from_pcg_op_attrs(pcg_get_op_attrs(pcg, l)).value(); + + std::vector inputs_dim_degrees = + get_incoming_input_degrees(pcg, l); + + MachineView machine_view = mapping.machine_views.at(l); + + return mapped_operator_task_group_from_machine_view( + op_attrs, + inputs_dim_degrees, + machine_view); + }), + }; +} + + +bidict + get_tensor_shard_to_device_coord_mapping(ComputationGraphOpAttrs const &, + MachineView const &) { + NOT_IMPLEMENTED(); +} + + + +std::string format_as(MappedParallelComputationGraph const &mapped_pcg) { + return fmt::format("", + as_dot(mapped_pcg.pcg), + mapped_pcg.mapped_tasks); +} + +std::ostream &operator<<(std::ostream &s, MappedParallelComputationGraph const &mapped_pcg) { + return (s << fmt::to_string(mapped_pcg)); +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/operator_atomic_task_shard_binding.cc b/lib/compiler/src/compiler/operator_atomic_task_shard_binding.cc new file mode 100644 index 0000000000..d2455c7f4c --- /dev/null +++ b/lib/compiler/src/compiler/operator_atomic_task_shard_binding.cc @@ -0,0 +1,66 @@ +#include "compiler/operator_atomic_task_shard_binding.h" +#include "op-attrs/get_operator_space_to_parallel_tensor_space_mappings.h" +#include "op-attrs/get_operator_task_space.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "pcg/machine_view.h" +#include "utils/containers/at_idx.h" +#include + +namespace FlexFlow { + +OperatorAtomicTaskShardBinding + operator_atomic_task_shard_binding_from_machine_view(ComputationGraphOpAttrs const &op_attrs, + std::vector const &inputs_dim_degrees, + MachineView const &machine_view, + MachineSpaceCoordinate const &machine_space_coord) { + OperatorTaskSpace op_task_space = get_operator_task_space(op_attrs, inputs_dim_degrees); + + TaskSpaceCoordinate task_space_coord = mv_task_space_coord_for_machine_space_coord( + machine_view, + op_task_space, + machine_space_coord); + + auto get_ptensor_coords = [&](TensorRole const &tensor_role) { + std::vector + mappings = get_operator_to_ptensor_mappings_for_role(op_attrs, inputs_dim_degrees, tensor_role); + + std::vector + ptensor_coords = transform(mappings, + [&](OperatorSpaceToParallelTensorSpaceMapping const &mapping) { + return ptensor_coord_for_task_space_coord(mapping, task_space_coord); + }); + + return ptensor_coords; + }; + + return OperatorAtomicTaskShardBinding{ + /*inputs=*/get_ptensor_coords(TensorRole::INPUT), + /*weights=*/get_ptensor_coords(TensorRole::WEIGHT), + /*outputs=*/get_ptensor_coords(TensorRole::OUTPUT), + }; +} + +std::vector + ptensor_space_coords_for_role(OperatorAtomicTaskShardBinding const &op_task_signature, + TensorRole tensor_role) { + switch (tensor_role) { + case TensorRole::INPUT: + return op_task_signature.inputs; + case TensorRole::WEIGHT: + return op_task_signature.weights; + case TensorRole::OUTPUT: + return op_task_signature.outputs; + default: + PANIC("Unhandled TensorRole", tensor_role); + }; +} + +ParallelTensorSpaceCoordinate + ptensor_space_coord_for_key(OperatorAtomicTaskShardBinding const &op_task_signature, + TaskSignatureTensorKey const &tensor_key) { + return at_idx( + ptensor_space_coords_for_role(op_task_signature, tensor_key.tensor_role), + tensor_key.idx); +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.cc b/lib/compiler/src/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.cc index 5eb993c6ef..f6a1ee98ec 100644 --- a/lib/compiler/src/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.cc +++ b/lib/compiler/src/compiler/series_parallel/pcg/pcg_binary_sp_decomposition.cc @@ -1,5 +1,6 @@ #include "compiler/series_parallel/pcg/pcg_binary_sp_decomposition.h" #include "compiler/series_parallel/pcg/pcg_binary_series_split.h" +#include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_path_to_leaf_map.h" #include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/find_paths_to_leaf.h" #include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_leaves.h" #include "utils/overload.h" @@ -106,10 +107,22 @@ SPDecompositionTreeNodeType }); } + +std::unordered_set + pcg_sp_tree_get_all_leaf_paths(PCGBinarySPDecomposition const &tree) { + return keys(pcg_sp_tree_get_path_to_leaf_map(tree)); +} + std::unordered_set find_paths_to_leaf(PCGBinarySPDecomposition const &tree, parallel_layer_guid_t const &leaf) { return find_paths_to_leaf(tree, generic_impl_for_pcg_sp_tree(), leaf); } +std::unordered_map + pcg_sp_tree_get_path_to_leaf_map(PCGBinarySPDecomposition const &tree) { + return get_path_to_leaf_map(tree, generic_impl_for_pcg_sp_tree()); +} + + } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc b/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc index c072b0e61e..eaa245c7cc 100644 --- a/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc +++ b/lib/compiler/src/compiler/task_graph_simulator/pcg_task_graph.cc @@ -2,11 +2,11 @@ #include "compiler/cost_estimator/runtime_only_op_cost_estimate_key.h" #include "compiler/cost_estimator/tensor_set_movement.h" #include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "op-attrs/operator_task_space.h" #include "pcg/device_id_t.dtg.h" #include "pcg/machine_specification.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" -#include "pcg/operator_task_space.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" @@ -20,7 +20,7 @@ namespace FlexFlow { PCGTaskGraph get_pcg_task_graph(ParallelComputationGraph const &pcg, MachineMapping const &machine_mapping, - MachineSpecification const &machine_spec) { + MachineComputeSpecification const &machine_spec) { DiGraph digraph = DiGraph::create(); bidict node_to_task; bidict node_to_layer; diff --git a/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc b/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc index a1aa53885b..4270edb007 100644 --- a/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc +++ b/lib/compiler/src/compiler/task_graph_simulator/task_simulator.cc @@ -26,7 +26,7 @@ milliseconds_t task_simulator_estimate_forward_pass_time( MachineSpecification const &machine_spec) { PCGTaskGraph task_graph = - get_pcg_task_graph(pcg, machine_mapping, machine_spec); + get_pcg_task_graph(pcg, machine_mapping, machine_spec.compute_specification); auto cost_function = [&](Node const &node) -> float { PCGTask task = task_graph.node_to_task.at_l(node); @@ -49,7 +49,7 @@ milliseconds_t task_simulator_estimate_forward_pass_time( PCGTask current_task = task_graph.node_to_task.at_l(task); UnstructuredDeviceMapping device_map = - get_unstructured_device_mapping(machine_mapping, machine_spec, pcg); + get_unstructured_device_mapping(machine_mapping, machine_spec.compute_specification, pcg); if (current_task.is_tensor_movement()) { return true; diff --git a/lib/compiler/src/compiler/task_signature_tensor_key.cc b/lib/compiler/src/compiler/task_signature_tensor_key.cc new file mode 100644 index 0000000000..b7d1377497 --- /dev/null +++ b/lib/compiler/src/compiler/task_signature_tensor_key.cc @@ -0,0 +1,34 @@ +#include "compiler/task_signature_tensor_key.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/containers/transform.h" +#include "utils/containers/set_union.h" + +namespace FlexFlow { + +std::unordered_set + all_keys_for_signature_arities( + nonnegative_int num_inputs, + nonnegative_int num_weights, + nonnegative_int num_outputs) { + + auto mk_key_set = [](nonnegative_int num, TensorRole role) { + return transform(unordered_set_of(nonnegative_range(num)), + [&](nonnegative_int idx) { + return TaskSignatureTensorKey{ + /*tensor_role=*/role, + /*idx=*/idx, + }; + }); + }; + + + return set_union(std::vector{ + mk_key_set(num_inputs, TensorRole::INPUT), + mk_key_set(num_weights, TensorRole::WEIGHT), + mk_key_set(num_outputs, TensorRole::OUTPUT), + }); +} + + +} // namespace FlexFlow diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 15f7d60060..76ffed4709 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -1,11 +1,11 @@ #include "compiler/allowed_machine_views.h" -#include "doctest/doctest.h" #include "utils/containers/extend.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_set_of.h" #include "utils/containers/zip.h" #include "utils/fmt/unordered_set.h" +#include using namespace FlexFlow; @@ -14,15 +14,13 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_allowed_machine_views") { SUBCASE("1 degree of parallelism") { - MachineSpecification ms = MachineSpecification{ + MachineComputeSpecification ms = MachineComputeSpecification{ /*num_nodes=*/1_p, /*num_cpus_per_node=*/5_p, /*num_gpus_per_node=*/5_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0, }; - OperatorTaskSpace task = OperatorTaskSpace{{3_p}}; + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{3_ge2}}}; std::unordered_set correct = { MachineView{ @@ -60,14 +58,12 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification{ + MachineComputeSpecification ms = MachineComputeSpecification{ /*num_nodes=*/3_p, /*num_cpus_per_node=*/3_p, /*num_gpus_per_node=*/3_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0, }; - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 3_p}}; + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{2_ge2, 3_ge2}}}; auto make_2d_view = [&](nonnegative_int start_node_idx, nonnegative_int start_device_idx, diff --git a/lib/compiler/test/src/graph_optimize_state.cc b/lib/compiler/test/src/compiler/graph_optimize_state.cc similarity index 79% rename from lib/compiler/test/src/graph_optimize_state.cc rename to lib/compiler/test/src/compiler/graph_optimize_state.cc index e7060ef421..a8797f1c43 100644 --- a/lib/compiler/test/src/graph_optimize_state.cc +++ b/lib/compiler/test/src/compiler/graph_optimize_state.cc @@ -1,6 +1,9 @@ #include "compiler/graph_optimize_state.h" -#include "doctest/doctest.h" +#include "compiler/machine_mapping/machine_mapping.dtg.h" +#include "compiler/mapped_parallel_computation_graph.h" +#include "pcg/machine_view.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" +#include using namespace FlexFlow; @@ -19,8 +22,9 @@ TEST_SUITE(FF_TEST_SUITE) { // `machine_mapping` is determined by the PCG and the device mapping // algorithm, and `runtime` is determined by the PCG and the device mapping, // so their values here do not matter. - std::unordered_map empty_machine_views; - MachineMapping empty_machine_mapping(empty_machine_views); + MachineMapping empty_machine_mapping = MachineMapping{ + std::unordered_map{}, + }; InitializerAttrs zero_init = InitializerAttrs{ZeroInitializerAttrs{}}; @@ -58,12 +62,16 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelComputationGraph pcg2 = create_pcg(); GraphOptimizeState state1 = GraphOptimizeState{ - GraphOptimizeResult{pcg1, empty_machine_mapping}, + GraphOptimizeResult{ + mapped_pcg_from_pcg_and_mapping(pcg1, empty_machine_mapping), + }, 0, }; GraphOptimizeState state2 = GraphOptimizeState{ - GraphOptimizeResult{pcg2, empty_machine_mapping}, + GraphOptimizeResult{ + mapped_pcg_from_pcg_and_mapping(pcg2, empty_machine_mapping), + }, 0, }; @@ -88,12 +96,16 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelComputationGraph pcg_ = builder_.pcg; GraphOptimizeState state1 = GraphOptimizeState{ - GraphOptimizeResult{pcg1, empty_machine_mapping}, + GraphOptimizeResult{ + mapped_pcg_from_pcg_and_mapping(pcg1, empty_machine_mapping), + }, 0, }; GraphOptimizeState state_ = GraphOptimizeState{ - GraphOptimizeResult{pcg_, empty_machine_mapping}, + GraphOptimizeResult{ + mapped_pcg_from_pcg_and_mapping(pcg_, empty_machine_mapping), + }, 0, }; diff --git a/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc index 0416a73660..5b1bfe6f55 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.cc @@ -1,4 +1,6 @@ #include "compiler/machine_mapping/abstracted_tensor_set_movement/get_abstracted_tensor_set_movement_across_split.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication.dtg.h" +#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" #include "op-attrs/parallel_tensor_shape.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" @@ -9,6 +11,95 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_abstracted_single_tensor_movement_along_edge") { + ParallelComputationGraph pcg = empty_parallel_computation_graph(); + + TensorShape input_shape = TensorShape{ + TensorDims{ + FFOrdered{ + 10_p, + 12_p, + }, + }, + DataType::FLOAT, + }; + + ParallelTensorShape par_input_shape = lift_to_parallel(input_shape); + + ParallelLayerAttrs partition_attrs = ParallelLayerAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ + RepartitionAttrs{ + /*repartition_dim=*/ff_dim_t{0_n}, + /*repartition_degree=*/2_p, + }, + }, + /*name=*/std::nullopt, + }; + + ParallelLayerAttrs relu_attrs = ParallelLayerAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ + ElementUnaryAttrs{ + /*op_type=*/OperatorType::RELU, + /*scalar=*/std::nullopt, + }, + }, + /*name=*/std::nullopt, + }; + + ParallelLayerAddedResult input = pcg_add_input_layer(pcg, input_shape); + parallel_tensor_guid_t t_input = get_only(input.outputs); + ParallelLayerAddedResult partition_input = + add_parallel_layer(pcg, partition_attrs, {t_input}, {}); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); + + ParallelLayerAddedResult layer_1 = + add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); + parallel_tensor_guid_t t_layer_1 = get_only(layer_1.outputs); + ParallelLayerAddedResult layer_2 = + add_parallel_layer(pcg, relu_attrs, {t_layer_1}, {}); + + ParallelComputationGraphEdge edge + = get_only( + get_pcg_edges_from_layer_to_layer( + /*pcg=*/pcg, + /*src=*/layer_1.parallel_layer, + /*dst=*/layer_2.parallel_layer)); + + BinaryTreePath src_path = BinaryTreePath{{}}; + BinaryTreePath dst_path = BinaryTreePath{{}}; + + AbstractedSingleTensorMovement result + = get_abstracted_single_tensor_movement_along_edge(pcg, edge, src_path, dst_path); + + num_bytes_t shard_size = get_piece_size_in_bytes(get_parallel_tensor_shape(pcg, t_layer_1)); + + auto mk_single_tensor_communication = [&](nonnegative_int src_coord, + nonnegative_int dst_coord) + -> AbstractedSingleTensorCommunication + { + return AbstractedSingleTensorCommunication{ + /*edge=*/AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/TaskSpaceCoordinate{OrthotopeCoord{{src_coord}}}, + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/dst_path, + /*task_space_coordinate=*/TaskSpaceCoordinate{OrthotopeCoord{{dst_coord}}}, + }, + }, + /*size=*/shard_size, + }; + }; + + AbstractedSingleTensorMovement correct = + abstracted_single_tensor_movement_from_communications( + /*src_op_tree_path=*/src_path, + /*communications=*/{ + mk_single_tensor_communication(0_n, 0_n), + mk_single_tensor_communication(1_n, 1_n), + }); + + CHECK(result == correct); + } + TEST_CASE("get_abstracted_tensor_set_movement_across_split") { auto make_series_split = [](PCGBinarySPDecomposition const &lhs, PCGBinarySPDecomposition const &rhs) { @@ -70,6 +161,21 @@ TEST_SUITE(FF_TEST_SUITE) { /*name=*/std::nullopt, }; + auto mk_task_space_coord = [&](nonnegative_int coord) { + return TaskSpaceCoordinate{ + OrthotopeCoord{{ + coord, + }}, + }; + }; + + auto mk_abstracted_device = [&](BinaryTreePath const &path, nonnegative_int coord) { + return AbstractedDevice{ + /*operator_tree_path=*/path, + /*task_space_coordinate=*/mk_task_space_coord(coord), + }; + }; + SUBCASE("no edges across split") { ParallelLayerAddedResult input1 = pcg_add_input_layer(pcg, input_shape); parallel_tensor_guid_t t_input1 = get_only(input1.outputs); @@ -104,7 +210,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t t_input = get_only(input.outputs); ParallelLayerAddedResult partition_input = add_parallel_layer(pcg, partition_attrs, {t_input}, {}); - parallel_tensor_guid_t t_partition_input = get_only(input.outputs); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); ParallelLayerAddedResult layer_1 = add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); @@ -124,22 +230,31 @@ TEST_SUITE(FF_TEST_SUITE) { get_abstracted_tensor_set_movement_across_split( pcg_get_transitive_reduction(pcg), split); + BinaryTreePath src_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + BinaryTreePath dst_path = BinaryTreePath{{}}; + + auto mk_abstracted_edge = [&](nonnegative_int src_coord, nonnegative_int dst_coord) { + return AbstractedSingleTensorCommunicationEdge{ + /*src=*/mk_task_space_coord(src_coord), + /*dst=*/mk_abstracted_device(dst_path, dst_coord), + }; + }; + + num_bytes_t shard_size = get_size_in_bytes(get_reduced_shape(get_parallel_tensor_shape(pcg, t_layer_1))); + AbstractedTensorSetMovement correct = AbstractedTensorSetMovement{ - /*single_tensor_movements=*/{ - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{}}, - }, - }, + /*single_tensor_movements=*/{ + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src_path, + /*edge_to_size=*/{ + {mk_abstracted_edge(0_n, 0_n), shard_size}, + {mk_abstracted_edge(1_n, 1_n), shard_size}, + }, }, + }, }; CHECK(result == correct); @@ -150,7 +265,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t t_input = get_only(input.outputs); ParallelLayerAddedResult partition_input = add_parallel_layer(pcg, partition_attrs, {t_input}, {}); - parallel_tensor_guid_t t_partition_input = get_only(input.outputs); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); ParallelLayerAddedResult layer_1 = add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); @@ -176,23 +291,32 @@ TEST_SUITE(FF_TEST_SUITE) { get_abstracted_tensor_set_movement_across_split( pcg_get_transitive_reduction(pcg), split); + BinaryTreePath src_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + BinaryTreePath dst_path = BinaryTreePath{{}}; + + auto mk_abstracted_edge = [&](nonnegative_int src_coord, nonnegative_int dst_coord) { + return AbstractedSingleTensorCommunicationEdge{ + /*src=*/mk_task_space_coord(src_coord), + /*dst=*/mk_abstracted_device(dst_path, dst_coord), + }; + }; + + num_bytes_t shard_size = get_size_in_bytes(get_reduced_shape(get_parallel_tensor_shape(pcg, t_layer_2))); + AbstractedTensorSetMovement correct = AbstractedTensorSetMovement{ - /*single_tensor_movements=*/{ - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{}}, - }, - }, + /*single_tensor_movements=*/{ + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src_path, + /*edge_to_size=*/{ + {mk_abstracted_edge(0_n, 0_n), shard_size}, + {mk_abstracted_edge(1_n, 1_n), shard_size}, + }, }, + }, }; CHECK(result == correct); @@ -203,7 +327,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t t_input = get_only(input.outputs); ParallelLayerAddedResult partition_input = add_parallel_layer(pcg, partition_attrs, {t_input}, {}); - parallel_tensor_guid_t t_partition_input = get_only(input.outputs); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); ParallelLayerAddedResult layer_1 = add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); @@ -228,27 +352,39 @@ TEST_SUITE(FF_TEST_SUITE) { get_abstracted_tensor_set_movement_across_split( pcg_get_transitive_reduction(pcg), split); + BinaryTreePath src_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + BinaryTreePath dst1_path = BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}; + + BinaryTreePath dst2_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + auto mk_abstracted_edge = [&](nonnegative_int src_coord, BinaryTreePath dst_path, nonnegative_int dst_coord) { + return AbstractedSingleTensorCommunicationEdge{ + /*src=*/mk_task_space_coord(src_coord), + /*dst=*/mk_abstracted_device(dst_path, dst_coord), + }; + }; + + num_bytes_t shard_size = get_size_in_bytes(get_reduced_shape(get_parallel_tensor_shape(pcg, t_layer_1))); + AbstractedTensorSetMovement correct = AbstractedTensorSetMovement{ - /*single_tensor_movements=*/{ - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - }, + /*single_tensor_movements=*/{ + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src_path, + /*edge_to_size=*/{ + {mk_abstracted_edge(0_n, dst1_path, 0_n), shard_size}, + {mk_abstracted_edge(1_n, dst1_path, 1_n), shard_size}, + {mk_abstracted_edge(0_n, dst2_path, 0_n), shard_size}, + {mk_abstracted_edge(1_n, dst2_path, 1_n), shard_size}, + }, }, + }, }; CHECK(result == correct); @@ -259,21 +395,23 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_tensor_guid_t t_input = get_only(input.outputs); ParallelLayerAddedResult partition_input = add_parallel_layer(pcg, partition_attrs, {t_input}, {}); - parallel_tensor_guid_t t_partition_input = get_only(input.outputs); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); ParallelLayerAddedResult layer_1 = add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); + parallel_tensor_guid_t t_layer_1 = get_only(layer_1.outputs); ParallelLayerAddedResult layer_2 = add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); + parallel_tensor_guid_t t_layer_2 = get_only(layer_2.outputs); ParallelLayerAddedResult layer_3 = - add_parallel_layer(pcg, relu_attrs, {get_only(layer_1.outputs)}, {}); + add_parallel_layer(pcg, relu_attrs, {t_layer_1}, {}); ParallelLayerAddedResult layer_4 = add_parallel_layer( pcg, ew_add_attrs, - {get_only(layer_1.outputs), get_only(layer_2.outputs)}, + {t_layer_1, t_layer_2}, {}); PCGBinarySeriesSplit split = PCGBinarySeriesSplit{ @@ -289,44 +427,53 @@ TEST_SUITE(FF_TEST_SUITE) { get_abstracted_tensor_set_movement_across_split( pcg_get_transitive_reduction(pcg), split); + BinaryTreePath src1_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + BinaryTreePathEntry::LEFT_CHILD, + }}; + + BinaryTreePath src2_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + BinaryTreePath dst1_path = BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}; + + BinaryTreePath dst2_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + auto mk_abstracted_edge = [&](nonnegative_int src_coord, BinaryTreePath dst_path, nonnegative_int dst_coord) { + return AbstractedSingleTensorCommunicationEdge{ + /*src=*/mk_task_space_coord(src_coord), + /*dst=*/mk_abstracted_device(dst_path, dst_coord), + }; + }; + + num_bytes_t t1_shard_size = get_size_in_bytes(get_reduced_shape(get_parallel_tensor_shape(pcg, t_layer_1))); + num_bytes_t t2_shard_size = get_size_in_bytes(get_reduced_shape(get_parallel_tensor_shape(pcg, t_layer_2))); + AbstractedTensorSetMovement correct = AbstractedTensorSetMovement{ - /*single_tensor_movements=*/{ - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - BinaryTreePathEntry::LEFT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - }, - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - }, + /*single_tensor_movements=*/{ + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src1_path, + /*edge_to_size=*/{ + {mk_abstracted_edge(0_n, dst1_path, 0_n), t1_shard_size}, + {mk_abstracted_edge(1_n, dst1_path, 1_n), t1_shard_size}, + {mk_abstracted_edge(0_n, dst2_path, 0_n), t1_shard_size}, + {mk_abstracted_edge(1_n, dst2_path, 1_n), t1_shard_size}, + }, }, + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/src2_path, + /*edge_to_size=*/{ + {mk_abstracted_edge(0_n, dst2_path, 0_n), t2_shard_size}, + {mk_abstracted_edge(1_n, dst2_path, 1_n), t2_shard_size}, + }, + }, + }, }; CHECK(result == correct); diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc b/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc deleted file mode 100644 index 5ae89a8123..0000000000 --- a/lib/compiler/test/src/compiler/machine_mapping/get_machine_resource_splits.cc +++ /dev/null @@ -1,240 +0,0 @@ -#include "compiler/machine_mapping/get_machine_resource_splits.h" -#include "test/utils/doctest/fmt/pair.h" -#include "test/utils/doctest/fmt/unordered_set.h" -#include "utils/hash/pair.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_machine_resource_splits") { - auto make_machine_spec = [](positive_int num_nodes, - positive_int num_gpus_per_node) { - return MachineSpecification{ - /*num_nodes=*/num_nodes, - /*num_cpus_per_node=*/1_p, - /*num_gpus_per_node=*/num_gpus_per_node, - /*inter_node_bandwidth=*/1.0, - /*intra_node_bandwidth=*/1.0, - }; - }; - - SUBCASE("returns no splits if no splits are possible") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p); - - std::unordered_set> - result = get_machine_resource_splits(input); - std::unordered_set> - correct = {}; - - CHECK(result == correct); - } - - SUBCASE( - "returns splits in gpu and node dimensions, but not at the same time") { - MachineSpecification input = make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/2_p); - - std::unordered_set> - result = get_machine_resource_splits(input); - - std::unordered_set> - correct = { - { - make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/2_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/2_p), - }, - - }; - - CHECK(result == correct); - } - - SUBCASE("returns splits in node dimension in powers of two") { - SUBCASE("num_nodes is a power of 2") { - MachineSpecification input = - make_machine_spec(/*num_nodes=*/8_p, - /*num_gpus_per_node=*/1_p); - - std::unordered_set< - std::pair> - result = get_machine_resource_splits(input); - - std::unordered_set< - std::pair> - correct = { - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/7_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/6_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/4_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/4_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/6_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/7_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - }, - }; - - CHECK(result == correct); - } - - SUBCASE("num_nodes is not a power of 2") { - MachineSpecification input = - make_machine_spec(/*num_nodes=*/6_p, - /*num_gpus_per_node=*/1_p); - - std::unordered_set< - std::pair> - result = get_machine_resource_splits(input); - - std::unordered_set< - std::pair> - correct = { - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/5_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/4_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/4_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/2_p, - /*num_gpus_per_node=*/1_p), - }, - { - make_machine_spec(/*num_nodes=*/5_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - }, - }; - - CHECK(result == correct); - } - } - - SUBCASE("returns splits in gpu dimension in powers of two") { - SUBCASE("num_gpus_per_node is a power of 2") { - MachineSpecification input = - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/8_p); - - std::unordered_set< - std::pair> - result = get_machine_resource_splits(input); - - std::unordered_set< - std::pair> - correct = { - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/7_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/2_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/6_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/4_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/4_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/6_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/2_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/7_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - }, - }; - - CHECK(result == correct); - } - - SUBCASE("num_gpus_per_node is not a power of 2") { - MachineSpecification input = - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/6_p); - - std::unordered_set< - std::pair> - result = get_machine_resource_splits(input); - - std::unordered_set< - std::pair> - correct = { - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/5_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/2_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/4_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/4_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/2_p), - }, - { - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/5_p), - make_machine_spec(/*num_nodes=*/1_p, - /*num_gpus_per_node=*/1_p), - }, - }; - } - } - } -} diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index 2cbc87cffe..ea04c96964 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -2,6 +2,7 @@ #include "compiler/cost_estimator/runtime_only_op_cost_estimate_key.dtg.h" #include "compiler/cost_estimator/runtime_only_op_cost_metrics.dtg.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" +#include "compiler/machine_mapping/machine_compute_resource_slice.h" #include "compiler/machine_mapping/machine_mapping_cache.h" #include "compiler/machine_mapping/machine_mapping_constraints.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" @@ -9,6 +10,7 @@ #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_runtime_only_op_cost_estimate_key.h" #include "internal/runtime_only_cost_estimator_for_test.h" #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/task_space_coordinate.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" #include "utils/containers/get_only.h" @@ -47,7 +49,7 @@ TEST_SUITE(FF_TEST_SUITE) { }; }; - MachineView mv1 = MachineView{ + MachineView mv_stride_1 = MachineView{ /*start=*/MachineSpaceCoordinate{ /*node_idx=*/0_n, /*device_idx=*/0_n, @@ -57,12 +59,12 @@ TEST_SUITE(FF_TEST_SUITE) { { MachineViewDimension{ stride_t{1_p}, - MachineSpecificationDimension::INTRA_NODE, + MachineSpecificationDimension::INTER_NODE, }, }, }; - MachineView mv2 = MachineView{ + MachineView mv_stride_2 = MachineView{ /*start=*/MachineSpaceCoordinate{ /*node_idx=*/0_n, /*device_idx=*/0_n, @@ -72,37 +74,31 @@ TEST_SUITE(FF_TEST_SUITE) { { MachineViewDimension{ stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE, + MachineSpecificationDimension::INTER_NODE, }, }, }; - MachineSpecification full_machine_spec = MachineSpecification{ - /*num_nodes=*/2_p, - /*num_cpus_per_node=*/1_p, + MachineComputeResourceSlice four_nodes_resources = MachineComputeResourceSlice{ + /*num_nodes=*/4_p, /*num_gpus_per_node=*/1_p, - /*inter_node_bandwidth=*/1, - /*intra_node_bandwidth=*/1, }; - MachineSpecification split_machine_spec = MachineSpecification{ + MachineComputeResourceSlice three_nodes_resources = MachineComputeResourceSlice{ + /*num_nodes=*/3_p, + /*num_gpus_per_node=*/1_p, + }; + + MachineComputeResourceSlice two_nodes_resources = MachineComputeResourceSlice{ + /*num_nodes=*/2_p, + /*num_gpus_per_node=*/1_p, + }; + + MachineComputeResourceSlice one_node_resources = MachineComputeResourceSlice{ /*num_nodes=*/1_p, - /*num_cpus_per_node=*/1_p, /*num_gpus_per_node=*/1_p, - /*inter_node_bandwidth=*/1, - /*intra_node_bandwidth=*/1, }; - auto allowed_machine_views1 = - [&](UnmappedRuntimeOnlyOpCostEstimateKey const &, - MachineSpecification const &resources) { - if (resources == full_machine_spec) { - return std::unordered_set{mv1, mv2}; - } else { - return std::unordered_set{mv2}; - } - }; - TensorShape tensor_shape = TensorShape{ TensorDims{ FFOrdered{ @@ -113,84 +109,73 @@ TEST_SUITE(FF_TEST_SUITE) { DataType::FLOAT, }; - UnmappedRuntimeOnlyOpCostEstimateKey k1 = - UnmappedRuntimeOnlyOpCostEstimateKey{ - /*op_attrs=*/PCGOperatorAttrs{InputAttrs{tensor_shape}}, - /*input_shapes=*/{}, - /*weight_shapes=*/{}, - /*output_shapes=*/{}, - }; - - UnmappedRuntimeOnlyOpCostEstimateKey k2 = - UnmappedRuntimeOnlyOpCostEstimateKey{ - /*op_attrs=*/PCGOperatorAttrs{ElementBinaryAttrs{ - /*type=*/OperatorType::EW_ADD, - /*compute_type=*/DataType::FLOAT, - /*should_broadcast_lhs=*/false, - /*should_broadcast_rhs=*/false, - }}, - /*input_shapes=*/{}, - /*weight_shapes=*/{}, - /*output_shapes=*/{}, - }; - - ParallelTensorShape par_tensor_shape = lift_to_parallel(tensor_shape); - - AbstractedTensorSetMovement movement1 = AbstractedTensorSetMovement{{ - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_tensor_shape, - /*src_machine_views=*/{}, - /*dst_machine_views=*/{}, - }, - }}; + BinaryTreePath src_path = binary_tree_root_path(); ParallelLayerGuidObliviousMachineMapping mm1 = ParallelLayerGuidObliviousMachineMapping{{ - {binary_tree_root_path(), mv1}, + {binary_tree_root_path(), mv_stride_1}, }}; ParallelLayerGuidObliviousMachineMapping mm2 = ParallelLayerGuidObliviousMachineMapping{{ - {binary_tree_root_path(), mv2}, + {binary_tree_root_path(), mv_stride_2}, }}; - auto map1 = std::unordered_map{{ - {map_unmapped_runtime_only_op_cost_estimate_key(k1, mv1), - RuntimeOnlyOpCostMetrics{/*forward_runtime=*/0.5_ms, - /*backward_runtime=*/0.5_ms}}, - {map_unmapped_runtime_only_op_cost_estimate_key(k2, mv1), - RuntimeOnlyOpCostMetrics{/*forward_runtime=*/1.0_ms, - /*backward_runtime=*/1.0_ms}}, - {map_unmapped_runtime_only_op_cost_estimate_key(k1, mv2), - RuntimeOnlyOpCostMetrics{/*forward_runtime=*/0.75_ms, - /*backward_runtime=*/0.75_ms}}, - {map_unmapped_runtime_only_op_cost_estimate_key(k2, mv2), - RuntimeOnlyOpCostMetrics{/*forward_runtime=*/1.25_ms, - /*backward_runtime=*/1.25_ms}}, - }}; - - RuntimeOnlyCostEstimator runtime_only_cost_estimator = - make_fake_runtime_only_cost_estimator( - map1, - std::unordered_map{{ - {TensorSetMovement{{}}, 0.0_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm1, mm1), - 0.1_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm2, mm2), - 0.2_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm1, mm2), - 0.3_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm2, mm1), - 0.4_ms}, - }}); - - MachineMappingContext context = MachineMappingContext{ - runtime_only_cost_estimator, - allowed_machine_views1, + OperatorTaskSpace task_space = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + }}, }; MachineMappingCache cache = empty_machine_mapping_cache(); + ParallelTensorShape par_tensor_shape = + lift_to_parallel_with_degrees( + tensor_shape, + ParallelTensorDimDegrees{ + /*sum_degree=*/SumDegree{1_p}, + /*discard_copy_degree=*/DiscardCopyDegree{1_p}, + /*shard_degrees=*/FFOrdered{ + 2_p, + 1_p, + }, + }); + + UnmappedRuntimeOnlyOpCostEstimateKey k1 = UnmappedRuntimeOnlyOpCostEstimateKey{ + /*op_attrs=*/PCGOperatorAttrs{ElementUnaryAttrs{ + /*type=*/OperatorType::GELU, + /*scalar=*/std::nullopt, + }}, + /*input_shapes=*/{par_tensor_shape}, + /*weight_shapes=*/{}, + /*output_shapes=*/{par_tensor_shape}, + }; + + UnmappedRuntimeOnlyOpCostEstimateKey k2 = UnmappedRuntimeOnlyOpCostEstimateKey{ + /*op_attrs=*/PCGOperatorAttrs{ElementUnaryAttrs{ + /*type=*/OperatorType::RELU, + /*scalar=*/std::nullopt, + }}, + /*input_shapes=*/{par_tensor_shape}, + /*weight_shapes=*/{}, + /*output_shapes=*/{par_tensor_shape}, + }; + + auto mk_cost_metrics = [&](float total_cost) { + return RuntimeOnlyOpCostMetrics{ + /*forward_runtime=*/milliseconds_t{total_cost / 2}, + /*backward_runtime=*/milliseconds_t{total_cost / 2}, + }; + }; + + auto mk_cost_entry = [&](UnmappedRuntimeOnlyOpCostEstimateKey const &key, + MachineView const &mv, + float total_cost) { + return std::pair{ + map_unmapped_runtime_only_op_cost_estimate_key(key, mv), + mk_cost_metrics(total_cost), + }; + }; + SUBCASE("single layer") { MachineMappingProblemTree problem_tree = make_leaf(k1); @@ -198,14 +183,40 @@ TEST_SUITE(FF_TEST_SUITE) { get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + ASSERT(k == k1); + ASSERT(resources == four_nodes_resources); + + return std::unordered_set{ + mv_stride_1, + mv_stride_2, + }; + }; + + RuntimeOnlyCostEstimator runtime_only_cost_estimator = + make_fake_runtime_only_cost_estimator( + { + mk_cost_entry(k1, mv_stride_1, 1), + mk_cost_entry(k1, mv_stride_2, 2), + }, + std::unordered_map{{}}); + + MachineMappingContext context = MachineMappingContext{ + /*cost_estimator=*/runtime_only_cost_estimator, + /*allowed_machine_views=*/allowed_machine_views, + }; + MachineMappingResult result = get_optimal_machine_mapping( - cache, context, problem_tree, full_machine_spec, constraints); + cache, context, problem_tree, four_nodes_resources, constraints); + MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ - /*runtime=*/1.0_ms, + /*runtime=*/1_ms, /*machine_mapping=*/ ParallelLayerGuidObliviousMachineMapping{{ - {binary_tree_root_path(), mv1}, + {binary_tree_root_path(), mv_stride_1}, }}, }, }; @@ -214,37 +225,188 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("pair of layers in sequence") { + AbstractedTensorSetMovement k1_to_k2 = AbstractedTensorSetMovement{ + /*single_tensor_movements=*/{ + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/binary_tree_root_path(), + /*edge_to_size=*/{ + { + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/make_task_space_coordinate({0_n}), + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/make_task_space_coordinate({0_n}), + }, + }, + get_size_in_bytes(tensor_shape), + }, + { + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/make_task_space_coordinate({1_n}), + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/make_task_space_coordinate({1_n}), + }, + }, + get_size_in_bytes(tensor_shape), + }, + }, + }, + }, + }; + MachineMappingProblemTree problem_tree = - make_series_split(movement1, make_leaf(k1), make_leaf(k2)); + make_series_split(k1_to_k2, make_leaf(k1), make_leaf(k2)); + + auto mk_tensor_set_movement = [&]( + MachineView const &src_mv, + MachineView const &dst_mv) { + + MachineSpaceStencil src_stencil = MachineSpaceStencil{ + /*operator_task_space=*/task_space, + /*machine_view=*/src_mv, + }; + + MachineSpaceStencil dst_stencil = MachineSpaceStencil{ + /*operator_task_space=*/task_space, + /*machine_view=*/dst_mv, + }; + + return concretize_abstracted_tensor_set_movement( + k1_to_k2, + /*pre_machine_stencils=*/{{binary_tree_root_path(), src_stencil}}, + /*post_machine_stencils=*/{{binary_tree_root_path(), dst_stencil}}); + }; + + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + if (resources == four_nodes_resources) { + return std::unordered_set{mv_stride_1, mv_stride_2}; + } else if (resources == three_nodes_resources) { + return std::unordered_set{mv_stride_1, mv_stride_2}; + } else if (resources == two_nodes_resources) { + return std::unordered_set{mv_stride_1}; + } else { + return std::unordered_set{}; + } + }; MachineMappingConstraints constraints = get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); - MachineMappingResult result = get_optimal_machine_mapping( - cache, context, problem_tree, full_machine_spec, constraints); - MachineMappingResult correct = MachineMappingResult{ - FeasibleMachineMappingResult{ - /*runtime=*/1.0_ms + 2.0_ms + 0.1_ms, - /*machine_mapping=*/ - ParallelLayerGuidObliviousMachineMapping{{ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - mv1, - }, - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - mv1, - }, - }}, - }, - }; + SUBCASE("solution requires taking comm cost into account") { + RuntimeOnlyCostEstimator runtime_only_cost_estimator = + make_fake_runtime_only_cost_estimator( + std::unordered_map{{ + mk_cost_entry(k1, mv_stride_1, 1), + mk_cost_entry(k1, mv_stride_2, 3), + mk_cost_entry(k2, mv_stride_1, 4), + mk_cost_entry(k2, mv_stride_2, 1), + }}, + std::unordered_map{{ + { + TensorSetMovement{{}}, + 0.0_ms, + }, + { + mk_tensor_set_movement(mv_stride_1, mv_stride_2), + 5_ms, + }, + { + mk_tensor_set_movement(mv_stride_2, mv_stride_1), + 5_ms, + }, + }}); + + MachineMappingContext context = MachineMappingContext{ + /*cost_estimator=*/runtime_only_cost_estimator, + /*allowed_machine_views=*/allowed_machine_views, + }; - CHECK(result == correct); + MachineMappingResult result = get_optimal_machine_mapping( + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineMappingResult correct = MachineMappingResult{ + FeasibleMachineMappingResult{ + /*runtime=*/1.0_ms + 3.0_ms, + /*machine_mapping=*/ + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv_stride_2, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + mv_stride_2, + }, + }}, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("solution places operators on different machine views") { + RuntimeOnlyCostEstimator runtime_only_cost_estimator = + make_fake_runtime_only_cost_estimator( + std::unordered_map{{ + mk_cost_entry(k1, mv_stride_1, 1), + mk_cost_entry(k1, mv_stride_2, 3), + mk_cost_entry(k2, mv_stride_1, 4), + mk_cost_entry(k2, mv_stride_2, 1), + }}, + std::unordered_map{{ + { + TensorSetMovement{{}}, + 0.0_ms, + }, + { + mk_tensor_set_movement(mv_stride_1, mv_stride_2), + 1_ms, + }, + { + mk_tensor_set_movement(mv_stride_2, mv_stride_1), + 1_ms, + }, + }}); + + MachineMappingContext context = MachineMappingContext{ + /*cost_estimator=*/runtime_only_cost_estimator, + /*allowed_machine_views=*/allowed_machine_views, + }; + + MachineMappingResult result = get_optimal_machine_mapping( + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineMappingResult correct = MachineMappingResult{ + FeasibleMachineMappingResult{ + /*runtime=*/1.0_ms + 1.0_ms + 1.0_ms, + /*machine_mapping=*/ + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv_stride_1, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + mv_stride_2, + }, + }}, + }, + }; + + CHECK(result == correct); + } } SUBCASE("pair of layers in parallel") { @@ -255,30 +417,177 @@ TEST_SUITE(FF_TEST_SUITE) { get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); - MachineMappingResult result = get_optimal_machine_mapping( - cache, context, problem_tree, full_machine_spec, constraints); - MachineMappingResult correct = MachineMappingResult{ - FeasibleMachineMappingResult{ - /*runtime=*/2.5_ms, - /*machine_mapping=*/ - ParallelLayerGuidObliviousMachineMapping{{ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - mv2, - }, - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - mv2, - }, - }}, - }, - }; + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + if (resources == four_nodes_resources) { + return std::unordered_set{mv_stride_1, mv_stride_2}; + } else if (resources == three_nodes_resources) { + return std::unordered_set{mv_stride_1, mv_stride_2}; + } else if (resources == two_nodes_resources) { + return std::unordered_set{mv_stride_1}; + } else { + return std::unordered_set{}; + } + }; - CHECK(result == correct); + SUBCASE("cannot use overlapping machine views in parallel") { + RuntimeOnlyCostEstimator runtime_only_cost_estimator = + make_fake_runtime_only_cost_estimator( + std::unordered_map{{ + mk_cost_entry(k1, mv_stride_1, 1), + mk_cost_entry(k1, mv_stride_2, 3), + mk_cost_entry(k2, mv_stride_1, 4), + mk_cost_entry(k2, mv_stride_2, 1), + }}, + std::unordered_map{{ + { + TensorSetMovement{{}}, + 0.0_ms, + }, + }}); + + MachineMappingContext context = MachineMappingContext{ + /*cost_estimator=*/runtime_only_cost_estimator, + /*allowed_machine_views=*/allowed_machine_views, + }; + + MachineMappingResult result = get_optimal_machine_mapping( + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineMappingResult correct = MachineMappingResult{ + FeasibleMachineMappingResult{ + /*runtime=*/2_ms, + /*machine_mapping=*/ + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv_stride_1, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + mv_stride_2, + }, + }}, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("solution is running operators in parallel") { + RuntimeOnlyCostEstimator runtime_only_cost_estimator = + make_fake_runtime_only_cost_estimator( + std::unordered_map{{ + mk_cost_entry(k1, mv_stride_1, 1), + mk_cost_entry(k1, mv_stride_2, 3), + mk_cost_entry(k2, mv_stride_1, 3), + mk_cost_entry(k2, mv_stride_2, 4), + }}, + std::unordered_map{{ + { + TensorSetMovement{{}}, + 0.0_ms, + }, + }}); + + MachineMappingContext context = MachineMappingContext{ + /*cost_estimator=*/runtime_only_cost_estimator, + /*allowed_machine_views=*/allowed_machine_views, + }; + + MachineMappingResult result = get_optimal_machine_mapping( + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineView translated_mv_stride_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/2_n, + /*device_idx=*/0_n, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + /*stride=*/stride_t{1_p}, + /*projection=*/MachineSpecificationDimension::INTER_NODE, + }, + }, + }; + + MachineMappingResult correct = MachineMappingResult{ + FeasibleMachineMappingResult{ + /*runtime=*/3_ms, + /*machine_mapping=*/ + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv_stride_1, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + translated_mv_stride_1, + }, + }}, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("solution is running operators in series") { + RuntimeOnlyCostEstimator runtime_only_cost_estimator = + make_fake_runtime_only_cost_estimator( + std::unordered_map{{ + mk_cost_entry(k1, mv_stride_1, 3), + mk_cost_entry(k1, mv_stride_2, 1), + mk_cost_entry(k2, mv_stride_1, 4), + mk_cost_entry(k2, mv_stride_2, 1), + }}, + std::unordered_map{{ + { + TensorSetMovement{{}}, + 0.0_ms, + }, + }}); + + MachineMappingContext context = MachineMappingContext{ + /*cost_estimator=*/runtime_only_cost_estimator, + /*allowed_machine_views=*/allowed_machine_views, + }; + + MachineMappingResult result = get_optimal_machine_mapping( + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineMappingResult correct = MachineMappingResult{ + FeasibleMachineMappingResult{ + /*runtime=*/2_ms, + /*machine_mapping=*/ + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv_stride_2, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + mv_stride_2, + }, + }}, + }, + }; + + CHECK(result == correct); + } } } } diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc index 586a2b7764..4fe029805f 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc @@ -1,6 +1,7 @@ #include "compiler/machine_mapping/get_tensor_set_movement_across_split.h" #include "compiler/machine_mapping/transitive_reduced_pcg.h" #include "internal/cost_estimator_for_test.h" +#include "op-attrs/parallel_tensor_shape.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" @@ -114,14 +115,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView pre_mv2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0_n, + /*node_idx=*/1_n, /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{2_p}, + stride_t{1_p}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -129,14 +130,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView post_mv1 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0_n, + /*node_idx=*/2_n, /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{3_p}, + stride_t{1_p}, MachineSpecificationDimension::INTRA_NODE, }, }, @@ -144,34 +145,54 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView post_mv2 = MachineView{ /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0_n, + /*node_idx=*/3_n, /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, /*dimensions=*/ { MachineViewDimension{ - stride_t{4_p}, + stride_t{1_p}, MachineSpecificationDimension::INTRA_NODE, }, }, }; + auto mk_communication_edge = [](MachineView const &src_mv, + nonnegative_int src_task_idx, + MachineView const &dst_mv, + nonnegative_int dst_task_idx) { + + ASSERT(src_task_idx < 2); + ASSERT(dst_task_idx < 2); + + return CommunicationEdge{ + /*src=*/MachineSpaceCoordinate{ + /*node_idx=*/src_mv.start.node_idx, + /*device_idx=*/src_task_idx, + /*device_type=*/DeviceType::GPU, + }, + /*dst=*/MachineSpaceCoordinate{ + /*node_idx=*/dst_mv.start.node_idx, + /*device_idx=*/dst_task_idx, + /*device_type=*/DeviceType::GPU, + }, + }; + }; + + num_bytes_t piece_size = get_piece_size_in_bytes(partitioned_input_shape); + SUBCASE("single edge across split") { PCGBinarySeriesSplit split = PCGBinarySeriesSplit{ - make_pcg_series_split( - make_pcg_series_split( - make_pcg_leaf_node(input.parallel_layer), - make_pcg_leaf_node(partition_input.parallel_layer)), - make_pcg_leaf_node(relu_1.parallel_layer)), + make_pcg_leaf_node(relu_1.parallel_layer), make_pcg_leaf_node(relu_2.parallel_layer), }; auto pre_mapping = ParallelLayerGuidObliviousMachineMapping{{ - {BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - pre_mv1}, + { + BinaryTreePath{{}}, + pre_mv1, + }, }}; auto post_mapping = ParallelLayerGuidObliviousMachineMapping{{ @@ -183,31 +204,77 @@ TEST_SUITE(FF_TEST_SUITE) { TensorSetMovement result = get_tensor_set_movement_across_split( pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + TensorSetMovement correct = TensorSetMovement{ - /*single_tensor_movements=*/{ - SingleTensorMovement{ - /*parallel_tensor_shape=*/partitioned_input_shape, - /*src_machine_views=*/{pre_mv1}, - /*dst_machine_views=*/{post_mv1}, - }, + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, }, }; CHECK(result == correct); } - SUBCASE("does not include edges removed by transitive reduction") {} + SUBCASE("does not include edges removed by transitive reduction") { + ParallelLayerAddedResult ew_add = + add_parallel_layer(pcg, ew_add_attrs, {get_only(relu_1.outputs), get_only(relu_2.outputs)}, {}); + + PCGBinarySeriesSplit split = PCGBinarySeriesSplit{ + make_pcg_series_split( + make_pcg_leaf_node(relu_1.parallel_layer), + make_pcg_leaf_node(relu_2.parallel_layer)), + make_pcg_leaf_node(ew_add.parallel_layer), + }; + + auto pre_mapping = ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{BinaryTreePathEntry::LEFT_CHILD}}, + pre_mv2, + }, + { + BinaryTreePath{{BinaryTreePathEntry::RIGHT_CHILD}}, + pre_mv1, + }, + }}; + + auto post_mapping = ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{}}, + post_mv1, + }, + }}; + + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, + }, + }; + + CHECK(result == correct); + } SUBCASE("single tensor, multiple consumers across split") { ParallelLayerAddedResult relu_3 = add_parallel_layer(pcg, relu_attrs, {get_only(relu_1.outputs)}, {}); PCGBinarySeriesSplit split = PCGBinarySeriesSplit{ - make_pcg_series_split( - make_pcg_series_split( - make_pcg_leaf_node(input.parallel_layer), - make_pcg_leaf_node(partition_input.parallel_layer)), - make_pcg_leaf_node(relu_1.parallel_layer)), + make_pcg_leaf_node(relu_1.parallel_layer), make_pcg_parallel_split(make_pcg_leaf_node(relu_2.parallel_layer), make_pcg_leaf_node(relu_3.parallel_layer)), }; @@ -215,9 +282,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("consumers have same view") { auto pre_mapping = ParallelLayerGuidObliviousMachineMapping{{ { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, + BinaryTreePath{{}}, pre_mv1, }, }}; @@ -244,12 +309,15 @@ TEST_SUITE(FF_TEST_SUITE) { post_mapping); TensorSetMovement correct = TensorSetMovement{ - /*single_tensor_movements=*/{ - SingleTensorMovement{ - /*parallel_tensor_shape=*/partitioned_input_shape, - /*src_machine_views=*/{pre_mv1}, - /*dst_machine_views=*/{post_mv1}, - }, + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, }, }; @@ -259,9 +327,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("consumers have different views") { auto pre_mapping = ParallelLayerGuidObliviousMachineMapping{{ { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, + BinaryTreePath{{}}, pre_mv1, }, }}; @@ -287,13 +353,25 @@ TEST_SUITE(FF_TEST_SUITE) { pre_mapping, post_mapping); + TensorSetMovement correct = TensorSetMovement{ - /*single_tensor_movements=*/{ - SingleTensorMovement{ - /*parallel_tensor_shape=*/partitioned_input_shape, - /*src_machine_views=*/{pre_mv1}, - /*dst_machine_views=*/{post_mv1, post_mv2}, - }, + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 0_n, post_mv2, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv2, 1_n), + piece_size, + }, }, }; @@ -312,68 +390,209 @@ TEST_SUITE(FF_TEST_SUITE) { {}); PCGBinarySeriesSplit split = PCGBinarySeriesSplit{ - make_pcg_series_split( - make_pcg_series_split( - make_pcg_leaf_node(input.parallel_layer), - make_pcg_leaf_node(partition_input.parallel_layer)), - make_pcg_parallel_split( - make_pcg_leaf_node(relu_1.parallel_layer), - make_pcg_leaf_node(relu_3.parallel_layer))), - make_pcg_parallel_split(make_pcg_leaf_node(relu_2.parallel_layer), - make_pcg_leaf_node(relu_4.parallel_layer)), + make_pcg_parallel_split( + make_pcg_leaf_node(relu_1.parallel_layer), + make_pcg_leaf_node(relu_3.parallel_layer)), + make_pcg_parallel_split( + make_pcg_leaf_node(relu_2.parallel_layer), + make_pcg_leaf_node(relu_4.parallel_layer)), }; - auto pre_mapping = ParallelLayerGuidObliviousMachineMapping{{ + auto mk_pre_mapping = [](MachineView const &src1_mv, MachineView const &src2_mv) { + return ParallelLayerGuidObliviousMachineMapping{{ { BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, BinaryTreePathEntry::LEFT_CHILD, }}, - pre_mv1, + src1_mv, }, { BinaryTreePath{{ BinaryTreePathEntry::RIGHT_CHILD, - BinaryTreePathEntry::RIGHT_CHILD, }}, - pre_mv2, + src2_mv, }, - }}; + }}; + }; - auto post_mapping = ParallelLayerGuidObliviousMachineMapping{{ + auto mk_post_mapping = [](MachineView const &dst1_mv, MachineView const &dst2_mv) { + return ParallelLayerGuidObliviousMachineMapping{{ { BinaryTreePath{{ BinaryTreePathEntry::LEFT_CHILD, }}, - post_mv1, + dst1_mv, }, { BinaryTreePath{{ BinaryTreePathEntry::RIGHT_CHILD, }}, - post_mv2, + dst2_mv, }, - }}; + }}; + }; - TensorSetMovement result = get_tensor_set_movement_across_split( - pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + SUBCASE("producers have different views and consumers have different views") { + ParallelLayerGuidObliviousMachineMapping pre_mapping = mk_pre_mapping(pre_mv1, pre_mv2); + ParallelLayerGuidObliviousMachineMapping post_mapping = mk_post_mapping(post_mv1, post_mv2); - TensorSetMovement correct = TensorSetMovement{ - /*single_tensor_movements=*/{ - SingleTensorMovement{ - /*parallel_tensor_shape=*/partitioned_input_shape, - /*src_machine_views=*/{pre_mv1}, - /*dst_machine_views=*/{post_mv1, post_mv2}, + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, }, - SingleTensorMovement{ - /*parallel_tensor_shape=*/partitioned_input_shape, - /*src_machine_views=*/{pre_mv2}, - /*dst_machine_views=*/{post_mv2}, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, }, - }, - }; + { + mk_communication_edge(pre_mv1, 0_n, post_mv2, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv2, 1_n), + piece_size, + }, + { + mk_communication_edge(pre_mv2, 0_n, post_mv2, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv2, 1_n, post_mv2, 1_n), + piece_size, + }, + }, + }; - CHECK(result == correct); + CHECK(result == correct); + } + + SUBCASE("producers have different views and consumers have the same view") { + ParallelLayerGuidObliviousMachineMapping pre_mapping = mk_pre_mapping(pre_mv1, pre_mv2); + ParallelLayerGuidObliviousMachineMapping post_mapping = mk_post_mapping(post_mv1, post_mv1); + + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, + { + mk_communication_edge(pre_mv2, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv2, 1_n, post_mv1, 1_n), + piece_size, + }, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("producers have the same view and consumers have different views") { + ParallelLayerGuidObliviousMachineMapping pre_mapping = mk_pre_mapping(pre_mv1, pre_mv1); + ParallelLayerGuidObliviousMachineMapping post_mapping = mk_post_mapping(post_mv1, post_mv2); + + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 0_n, post_mv2, 0_n), + piece_size + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv2, 1_n), + piece_size + piece_size, + }, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("producers have the same view and consumers have the same view") { + ParallelLayerGuidObliviousMachineMapping pre_mapping = mk_pre_mapping(pre_mv1, pre_mv1); + ParallelLayerGuidObliviousMachineMapping post_mapping = mk_post_mapping(post_mv1, post_mv1); + + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size + piece_size, + }, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("all producers and consumers have the same view") { + ParallelLayerGuidObliviousMachineMapping pre_mapping = mk_pre_mapping(pre_mv1, pre_mv1); + ParallelLayerGuidObliviousMachineMapping post_mapping = mk_post_mapping(pre_mv1, pre_mv1); + + + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{{}}, + }; + + CHECK(result == correct); + } + + SUBCASE("producers and one consumer share the same view") { + ParallelLayerGuidObliviousMachineMapping pre_mapping = mk_pre_mapping(pre_mv1, pre_mv1); + ParallelLayerGuidObliviousMachineMapping post_mapping = mk_post_mapping(post_mv1, pre_mv1); + + TensorSetMovement result = get_tensor_set_movement_across_split( + pcg_get_transitive_reduction(pcg), split, pre_mapping, post_mapping); + + TensorSetMovement correct = TensorSetMovement{ + /*edge_to_size=*/{ + { + mk_communication_edge(pre_mv1, 0_n, post_mv1, 0_n), + piece_size, + }, + { + mk_communication_edge(pre_mv1, 1_n, post_mv1, 1_n), + piece_size, + }, + }, + }; + + CHECK(result == correct); + } } } } diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc index 928d30ecaa..1f4f7256c5 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc @@ -1,6 +1,6 @@ #include "compiler/machine_mapping/machine_mapping.h" -#include "doctest/doctest.h" #include "pcg/machine_view.h" +#include using namespace FlexFlow; diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc index 2fcffac29a..6f6cde4eab 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_problem_tree/get_machine_mapping_problem_tree.cc @@ -4,6 +4,7 @@ #include "op-attrs/parallel_tensor_shape.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "utils/containers/get_only.h" +#include "utils/full_binary_tree/binary_tree_path.h" #include using namespace ::FlexFlow; @@ -102,6 +103,8 @@ TEST_SUITE(FF_TEST_SUITE) { }; }; + TaskSpaceCoordinate empty_task_space_coord = TaskSpaceCoordinate{OrthotopeCoord{{}}}; + SUBCASE("single layer") { ParallelLayerAddedResult input_added = add_parallel_layer(pcg, @@ -162,18 +165,23 @@ TEST_SUITE(FF_TEST_SUITE) { get_machine_mapping_problem_tree(pcg, sp_decomposition); MachineMappingProblemTree correct = mm_problem_tree_make_series( - AbstractedTensorSetMovement{{ + AbstractedTensorSetMovement{ + /*single_tensor_movements=*/{ AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{}}, - }, - /*dst_machine_views=*/ + /*src_op_tree_path=*/binary_tree_root_path(), + /*edge_to_size=*/{ { - BinaryTreePath{{}}, + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/empty_task_space_coord, + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/empty_task_space_coord, + }, + }, + get_piece_size_in_bytes(par_input_shape), }, }, + } }}, mm_problem_tree_make_leaf(input_key), mm_problem_tree_make_leaf(relu_key)); @@ -253,35 +261,40 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingProblemTree result = get_machine_mapping_problem_tree(pcg, sp_decomposition); + BinaryTreePath src1_path = BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}; + + BinaryTreePath src2_path = BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}; + + AbstractedSingleTensorCommunicationEdge edge = + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/empty_task_space_coord, + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/empty_task_space_coord, + }, + }; + MachineMappingProblemTree correct = mm_problem_tree_make_series( - AbstractedTensorSetMovement{{ + AbstractedTensorSetMovement{ + /*single_tensor_movements=*/{ AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{}}, - }, + /*src_op_tree_path=*/src1_path, + /*edge_to_size=*/{ + {edge, get_piece_size_in_bytes(par_input_shape)}, + }, }, AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_input_shape, - /*src_machine_views=*/ - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - }, - /*dst_machine_views=*/ - { - BinaryTreePath{{}}, - }, + /*src_op_tree_path=*/src2_path, + /*edge_to_size=*/{ + {edge, get_piece_size_in_bytes(par_input_shape)}, + }, }, - }}, + }, + }, /*pre=*/ mm_problem_tree_make_parallel(mm_problem_tree_make_leaf(input1_key), mm_problem_tree_make_leaf(input2_key)), diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc index 26f61253c3..bbffa4700e 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc @@ -253,29 +253,49 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult infeasible = infeasible_machine_mapping_result(); + MachineResourceSplit split = MachineResourceSplit{ + /*offset=*/3_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }; + SUBCASE("lhs is infeasible") { - MachineMappingResult result = parallel_combine(infeasible, rhs); + MachineMappingResult result = parallel_combine(split, infeasible, rhs); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("rhs is infeasible") { - MachineMappingResult result = parallel_combine(lhs, infeasible); + MachineMappingResult result = parallel_combine(split, lhs, infeasible); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("both are infeasible") { - MachineMappingResult result = parallel_combine(infeasible, infeasible); + MachineMappingResult result = parallel_combine(split, infeasible, infeasible); MachineMappingResult correct = infeasible; CHECK(result == correct); } SUBCASE("both are feasible") { - MachineMappingResult result = parallel_combine(lhs, rhs); + MachineView translated_machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/3_n, + /*device_idx=*/0_n, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineMappingResult result = parallel_combine(split, lhs, rhs); MachineMappingResult correct = MachineMappingResult{ FeasibleMachineMappingResult{ /*runtime=*/4_ms, @@ -299,7 +319,7 @@ TEST_SUITE(FF_TEST_SUITE) { BinaryTreePath{{ BinaryTreePathEntry::RIGHT_CHILD, }}, - machine_view_1, + translated_machine_view_1, }, }}, }, diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_resource_split.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_resource_split.cc new file mode 100644 index 0000000000..794380e7b8 --- /dev/null +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_resource_split.cc @@ -0,0 +1,123 @@ +#include "compiler/machine_mapping/machine_resource_split.h" +#include "pcg/machine_compute_specification.dtg.h" +#include "test/utils/doctest/fmt/pair.h" +#include "test/utils/doctest/fmt/unordered_set.h" +#include "utils/hash/pair.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_machine_resource_splits") { + SUBCASE("returns no splits if no splits are possible") { + MachineComputeResourceSlice input = MachineComputeResourceSlice{ + /*num_nodes=*/1_p, + /*num_gpus_per_node=*/1_p, + }; + + std::unordered_set + result = get_machine_resource_splits(input); + std::unordered_set + correct = {}; + + CHECK(result == correct); + } + + SUBCASE( + "returns splits in gpu and node dimensions") { + MachineComputeResourceSlice input = MachineComputeResourceSlice{ + /*num_nodes=*/2_p, + /*num_gpus_per_node=*/2_p, + }; + + std::unordered_set + result = get_machine_resource_splits(input); + + std::unordered_set + correct = { + MachineResourceSplit{ + /*offset=*/1_p, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }, + MachineResourceSplit{ + /*offset=*/1_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + } + }; + + CHECK(result == correct); + } + + SUBCASE("returns splits in node dimension in powers of two") { + MachineComputeResourceSlice input = MachineComputeResourceSlice{ + /*num_nodes=*/8_p, + /*num_gpus_per_node=*/1_p, + }; + + std::unordered_set + result = get_machine_resource_splits(input); + + std::unordered_set + correct = { + MachineResourceSplit{ + /*offset=*/1_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }, + MachineResourceSplit{ + /*offset=*/2_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }, + MachineResourceSplit{ + /*offset=*/4_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }, + MachineResourceSplit{ + /*offset=*/6_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }, + MachineResourceSplit{ + /*offset=*/7_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("returns splits in gpu dimension in powers of two") { + MachineComputeResourceSlice input = MachineComputeResourceSlice{ + /*num_nodes=*/1_p, + /*num_gpus_per_node=*/8_p, + }; + + std::unordered_set + result = get_machine_resource_splits(input); + + std::unordered_set + correct = { + MachineResourceSplit{ + /*offset=*/1_p, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }, + MachineResourceSplit{ + /*offset=*/2_p, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }, + MachineResourceSplit{ + /*offset=*/4_p, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }, + MachineResourceSplit{ + /*offset=*/6_p, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }, + MachineResourceSplit{ + /*offset=*/7_p, + /*dimension=*/MachineSpecificationDimension::INTRA_NODE, + }, + }; + + CHECK(result == correct); + } + } +} diff --git a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc index 96b11e6d33..11b2a0211d 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.cc @@ -1,4 +1,5 @@ #include "compiler/machine_mapping/memory_optimization/get_optimal_machine_mapping_with_memory.h" +#include "compiler/cost_estimator/tensor_set_movement.h" #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/machine_mapping_constraints.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.h" @@ -6,9 +7,11 @@ #include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.h" #include "internal/cost_estimator_for_test.h" #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/task_space_coordinate.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h" #include "utils/containers/get_only.h" +#include "utils/containers/map_from_pairs.h" #include "utils/full_binary_tree/binary_tree_path.h" #include "utils/nonnegative_int/nonnegative_int.h" #include @@ -51,13 +54,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, - /*dimensions=*/ - { - MachineViewDimension{ - stride_t{1_p}, - MachineSpecificationDimension::INTRA_NODE, - }, - }, + /*dimensions=*/{}, }; MachineView mv2 = MachineView{ @@ -66,41 +63,62 @@ TEST_SUITE(FF_TEST_SUITE) { /*device_idx=*/0_n, /*device_type=*/DeviceType::GPU, }, - /*dimensions=*/ - { - MachineViewDimension{ - stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE, - }, + /*dimensions=*/{ + MachineViewDimension{ + /*stride=*/stride_t{1_p}, + /*projection=*/MachineSpecificationDimension::INTER_NODE, + }, }, }; - MachineSpecification full_machine_spec = MachineSpecification{ - /*num_nodes=*/2_p, - /*num_cpus_per_node=*/1_p, + MachineView mv3 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0_n, + /*device_idx=*/0_n, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + /*stride=*/stride_t{2_p}, + /*projection=*/MachineSpecificationDimension::INTER_NODE, + }, + }, + }; + + MachineView mv4 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/1_n, + /*device_idx=*/0_n, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + /*stride=*/stride_t{1_p}, + /*projection=*/MachineSpecificationDimension::INTER_NODE, + }, + }, + }; + + MachineComputeResourceSlice four_nodes_resources = MachineComputeResourceSlice{ + /*num_nodes=*/4_p, /*num_gpus_per_node=*/1_p, - /*inter_node_bandwidth=*/1, - /*intra_node_bandwidth=*/1, }; - MachineSpecification split_machine_spec = MachineSpecification{ + MachineComputeResourceSlice three_nodes_resources = MachineComputeResourceSlice{ + /*num_nodes=*/3_p, + /*num_gpus_per_node=*/1_p, + }; + + MachineComputeResourceSlice two_nodes_resources = MachineComputeResourceSlice{ + /*num_nodes=*/2_p, + /*num_gpus_per_node=*/1_p, + }; + + MachineComputeResourceSlice one_node_resources = MachineComputeResourceSlice{ /*num_nodes=*/1_p, - /*num_cpus_per_node=*/1_p, /*num_gpus_per_node=*/1_p, - /*inter_node_bandwidth=*/1, - /*intra_node_bandwidth=*/1, }; - auto allowed_machine_views1 = - [&](UnmappedRuntimeOnlyOpCostEstimateKey const &, - MachineSpecification const &resources) { - if (resources == full_machine_spec) { - return std::unordered_set{mv1, mv2}; - } else { - return std::unordered_set{mv2}; - } - }; - TensorShape tensor_shape = TensorShape{ TensorDims{ FFOrdered{ @@ -111,7 +129,18 @@ TEST_SUITE(FF_TEST_SUITE) { DataType::FLOAT, }; - ParallelTensorShape par_tensor_shape = lift_to_parallel(tensor_shape); + ParallelTensorShape pre_partition_par_tensor_shape = lift_to_parallel(tensor_shape); + ParallelTensorShape post_partition_par_tensor_shape = + lift_to_parallel_with_degrees( + tensor_shape, + ParallelTensorDimDegrees{ + /*sum_degree=*/SumDegree{1_p}, + /*discard_copy_degree=*/DiscardCopyDegree{1_p}, + /*shard_degrees=*/FFOrdered{ + 2_p, + 1_p, + }, + }); OptimizerAttrs optimizer_attrs = OptimizerAttrs{ SGDOptimizerAttrs{ @@ -122,34 +151,47 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; + UnmappedOpCostEstimateKey k1 = UnmappedOpCostEstimateKey{ /*op_attrs=*/PCGOperatorAttrs{InputAttrs{tensor_shape}}, /*input_shapes=*/{}, /*weight_shapes=*/{}, - /*output_shapes=*/{}, + /*output_shapes=*/{pre_partition_par_tensor_shape}, /*optimizer_attrs=*/optimizer_attrs, }; UnmappedOpCostEstimateKey k2 = UnmappedOpCostEstimateKey{ - /*op_attrs=*/PCGOperatorAttrs{ElementBinaryAttrs{ - /*type=*/OperatorType::EW_ADD, - /*compute_type=*/DataType::FLOAT, - /*should_broadcast_lhs=*/false, - /*should_broadcast_rhs=*/false, + /*op_attrs=*/PCGOperatorAttrs{ElementUnaryAttrs{ + /*type=*/OperatorType::GELU, + /*scalar=*/std::nullopt, }}, - /*input_shapes=*/{}, + /*input_shapes=*/{post_partition_par_tensor_shape}, + /*weight_shapes=*/{}, + /*output_shapes=*/{post_partition_par_tensor_shape}, + /*optimizer_attrs=*/optimizer_attrs, + }; + + UnmappedOpCostEstimateKey k3 = UnmappedOpCostEstimateKey{ + /*op_attrs=*/PCGOperatorAttrs{ElementUnaryAttrs{ + /*type=*/OperatorType::RELU, + /*scalar=*/std::nullopt, + }}, + /*input_shapes=*/{post_partition_par_tensor_shape}, /*weight_shapes=*/{}, - /*output_shapes=*/{}, + /*output_shapes=*/{post_partition_par_tensor_shape}, /*optimizer_attrs=*/optimizer_attrs, }; - AbstractedTensorSetMovement movement1 = AbstractedTensorSetMovement{{ - AbstractedSingleTensorMovement{ - /*parallel_tensor_shape=*/par_tensor_shape, - /*src_machine_views=*/{}, - /*dst_machine_views=*/{}, - }, - }}; + + TaskSpaceCoordinate empty_task_space_coord = TaskSpaceCoordinate{OrthotopeCoord{{}}}; + + BinaryTreePath src_path = binary_tree_root_path(); + TaskSpaceCoordinate src_coord = empty_task_space_coord; + + AbstractedDevice dst_device = AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/empty_task_space_coord, + }; ParallelLayerGuidObliviousMachineMapping mm1 = ParallelLayerGuidObliviousMachineMapping{{ @@ -160,169 +202,545 @@ TEST_SUITE(FF_TEST_SUITE) { {binary_tree_root_path(), mv2}, }}; - CostEstimator cost_estimator = make_fake_cost_estimator( - std::unordered_map{{ - {map_unmapped_op_cost_estimate_key(k1, mv1), - OpCostMetrics{/*forward_runtime=*/1_ms, - /*backward_runtime=*/1_ms, - /*memory_usage=*/2_bytes}}, - {map_unmapped_op_cost_estimate_key(k2, mv1), - OpCostMetrics{/*forward_runtime=*/2_ms, - /*backward_runtime=*/2_ms, - /*memory_usage=*/3_bytes}}, - {map_unmapped_op_cost_estimate_key(k1, mv2), - OpCostMetrics{/*forward_runtime=*/1.5_ms, - /*backward_runtime=*/1.5_ms, - /*memory_usage=*/1_bytes}}, - {map_unmapped_op_cost_estimate_key(k2, mv2), - OpCostMetrics{/*forward_runtime=*/2.5_ms, - /*backward_runtime=*/2.5_ms, - /*memory_usage=*/2_bytes}}, - }}, - std::unordered_map{{ - {TensorSetMovement{/*movements=*/{}}, /*cost=*/0.0_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm1, mm1), - /*cost=*/0.1_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm2, mm2), - /*cost=*/0.2_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm1, mm2), - /*cost=*/0.3_ms}, - {concretize_abstracted_tensor_set_movement(movement1, mm2, mm1), - /*cost=*/0.4_ms}, - }}); - - MachineMappingWithMemoryContext context = MachineMappingWithMemoryContext{ - cost_estimator, - optimizer_attrs, - allowed_machine_views1, + OperatorTaskSpace unparallel_task_space = OperatorTaskSpace{MinimalOrthotope{{}}}; + OperatorTaskSpace parallel_task_space = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + }}, }; - MachineMappingWithMemoryCache cache = - empty_machine_mapping_with_memory_cache(); + auto get_corresponding_task_space = [&](MachineView const &mv) { + if (mv == mv1) { + return unparallel_task_space; + } else { + ASSERT(mv == mv2 || mv == mv3); + + return parallel_task_space; + } + }; + + SUBCASE("single layer with single option") { + OpCostMetrics k1_on_mv1_cost = OpCostMetrics{ + /*forward_runtime=*/1_ms, + /*backward_runtime=*/1_ms, + /*memory_usage=*/2_bytes, + }; + + CostEstimator cost_estimator = make_fake_cost_estimator( + std::unordered_map{{ + { + map_unmapped_op_cost_estimate_key(k1, mv1), + k1_on_mv1_cost, + }, + }}, + std::unordered_map{ + { + empty_tensor_set_movement(), + 0_ms, + }, + }); - SUBCASE("single layer") { MachineMappingProblemTree problem_tree = make_leaf(k1); MachineMappingConstraints constraints = get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + + ASSERT(k == runtime_only_from_unmapped_op_cost_estimate_key(k1)); + ASSERT(resources == four_nodes_resources); + return std::unordered_set{mv1}; + }; + + MachineMappingWithMemoryContext context = MachineMappingWithMemoryContext{ + cost_estimator, + optimizer_attrs, + allowed_machine_views, + }; + + MachineMappingWithMemoryCache cache = + empty_machine_mapping_with_memory_cache(); + MachineMappingWithMemoryResult result = get_optimal_machine_mapping_with_memory( - cache, context, problem_tree, full_machine_spec, constraints); + cache, context, problem_tree, four_nodes_resources, constraints); + MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ - OpCostMetrics{/*forward_runtime=*/1_ms, - /*backward_runtime=*/1_ms, - /*memory_usage=*/2_bytes}, + ParetoOptimalMachineMapping{ + k1_on_mv1_cost, ParallelLayerGuidObliviousMachineMapping{{ {binary_tree_root_path(), mv1}, }}, }, - MachineMappingForSingleLayer{ - OpCostMetrics{/*forward_runtime=*/1.5_ms, - /*backward_runtime=*/1.5_ms, - /*memory_usage=*/1_bytes}, - ParallelLayerGuidObliviousMachineMapping{{ - {binary_tree_root_path(), mv2}, - }}, - }, }}; CHECK(result == correct); } - SUBCASE("pair of layers in sequence") { - MachineMappingProblemTree problem_tree = - make_series_split(movement1, make_leaf(k1), make_leaf(k2)); + SUBCASE("single layer with multiple options") { + + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + + ASSERT(k == runtime_only_from_unmapped_op_cost_estimate_key(k3)); + ASSERT(resources == four_nodes_resources); + return std::unordered_set{mv2, mv3, mv4}; + }; + + OpCostMetrics k3_on_mv2_cost = OpCostMetrics{ + /*forward_runtime=*/2.5_ms, + /*backward_runtime=*/2.5_ms, + /*memory_usage=*/2_bytes, + }; + + OpCostMetrics k3_on_mv3_cost = OpCostMetrics{ + /*forward_runtime=*/2_ms, + /*backward_runtime=*/2_ms, + /*memory_usage=*/2_bytes, + }; + + OpCostMetrics k3_on_mv4_cost = OpCostMetrics{ + /*forward_runtime=*/3_ms, + /*backward_runtime=*/3_ms, + /*memory_usage=*/3_bytes, + }; + + CostEstimator cost_estimator = make_fake_cost_estimator( + std::unordered_map{{ + { + map_unmapped_op_cost_estimate_key(k3, mv2), + k3_on_mv2_cost, + }, + { + map_unmapped_op_cost_estimate_key(k3, mv3), + k3_on_mv3_cost, + }, + { + map_unmapped_op_cost_estimate_key(k3, mv4), + k3_on_mv4_cost, + }, + }}, + std::unordered_map{ + { + empty_tensor_set_movement(), + 0_ms, + }, + }); + + MachineMappingProblemTree problem_tree = make_leaf(k3); MachineMappingConstraints constraints = get_unconstrained_solution_for_layers( - get_all_leaf_paths(problem_tree)); + get_all_leaf_paths(problem_tree)); + + MachineMappingWithMemoryCache cache = + empty_machine_mapping_with_memory_cache(); + + MachineMappingWithMemoryContext context = MachineMappingWithMemoryContext{ + cost_estimator, + optimizer_attrs, + allowed_machine_views, + }; MachineMappingWithMemoryResult result = get_optimal_machine_mapping_with_memory( - cache, context, problem_tree, full_machine_spec, constraints); + cache, context, problem_tree, four_nodes_resources, constraints); + MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ - OpCostMetrics{ - /*forward_runtime=*/1.0_ms + 2.0_ms + 0.1_ms, - /*backward_runtime=*/1.0_ms + 2.0_ms + 0.1_ms, - /*memory_usage=*/2_bytes + 3_bytes, - }, - ParallelLayerGuidObliviousMachineMapping{{ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - mv1, - }, - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - mv1, - }, - }}, + ParetoOptimalMachineMapping{ + k3_on_mv2_cost, + ParallelLayerGuidObliviousMachineMapping{{ + {binary_tree_root_path(), mv2}, + }}, }, - MachineMappingForSingleLayer{ - OpCostMetrics{/*forward_runtime=*/1.5_ms + 2.5_ms + 0.1_ms, - /*backward_runtime=*/1.5_ms + 2.5_ms + 0.1_ms, - /*memory_usage=*/1_bytes + 2_bytes}, - ParallelLayerGuidObliviousMachineMapping{{ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - mv2, - }, - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - mv2, - }, - }}, + ParetoOptimalMachineMapping{ + k3_on_mv3_cost, + ParallelLayerGuidObliviousMachineMapping{{ + {binary_tree_root_path(), mv3}, + }}, }, }}; CHECK(result == correct); } + + SUBCASE("pair of layers in sequence") { + AbstractedTensorSetMovement k2_to_k3 = AbstractedTensorSetMovement{ + /*single_tensor_movements=*/{ + AbstractedSingleTensorMovement{ + /*src_op_tree_path=*/binary_tree_root_path(), + /*edge_to_size=*/{ + { + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/make_task_space_coordinate({0_n}), + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/make_task_space_coordinate({0_n}), + }, + }, + get_size_in_bytes(tensor_shape), + }, + { + AbstractedSingleTensorCommunicationEdge{ + /*src_coord=*/make_task_space_coordinate({1_n}), + /*dst=*/AbstractedDevice{ + /*operator_tree_path=*/binary_tree_root_path(), + /*task_space_coordinate=*/make_task_space_coordinate({1_n}), + }, + }, + get_size_in_bytes(tensor_shape), + }, + }, + }, + }, + }; + + auto mk_tensor_set_movement = [&]( + MachineView const &src_mv, + MachineView const &dst_mv) { + + MachineSpaceStencil src_stencil = MachineSpaceStencil{ + /*operator_task_space=*/get_corresponding_task_space(src_mv), + /*machine_view=*/src_mv, + }; + + MachineSpaceStencil dst_stencil = MachineSpaceStencil{ + /*operator_task_space=*/get_corresponding_task_space(dst_mv), + /*machine_view=*/dst_mv, + }; + + return concretize_abstracted_tensor_set_movement( + k2_to_k3, + /*pre_machine_stencils=*/{{binary_tree_root_path(), src_stencil}}, + /*post_machine_stencils=*/{{binary_tree_root_path(), dst_stencil}}); + }; + + auto mk_cost_estimator = [&]( + milliseconds_t k2_on_mv2_cost, + num_bytes_t k2_on_mv2_mem_usage, + milliseconds_t k2_on_mv3_cost, + num_bytes_t k2_on_mv3_mem_usage, + milliseconds_t k3_on_mv2_cost, + num_bytes_t k3_on_mv2_mem_usage, + milliseconds_t k3_on_mv3_cost, + num_bytes_t k3_on_mv3_mem_usage, + milliseconds_t mv2_to_mv2_cost, + milliseconds_t mv2_to_mv3_cost, + milliseconds_t mv3_to_mv2_cost, + milliseconds_t mv3_to_mv3_cost) { + + return make_fake_cost_estimator( + std::unordered_map{{ + { + map_unmapped_op_cost_estimate_key(k2, mv2), + OpCostMetrics{ + /*forward_runtime=*/k2_on_mv2_cost, + /*backward_runtime=*/k2_on_mv2_cost, + /*memory_usage=*/k2_on_mv2_mem_usage, + }, + }, + { + map_unmapped_op_cost_estimate_key(k2, mv3), + OpCostMetrics{ + /*forward_runtime=*/k2_on_mv3_cost, + /*backward_runtime=*/k2_on_mv3_cost, + /*memory_usage=*/k2_on_mv3_mem_usage, + }, + }, + { + map_unmapped_op_cost_estimate_key(k3, mv2), + OpCostMetrics{ + /*forward_runtime=*/k3_on_mv2_cost, + /*backward_runtime=*/k3_on_mv2_cost, + /*memory_usage=*/k3_on_mv2_mem_usage, + }, + }, + { + map_unmapped_op_cost_estimate_key(k3, mv3), + OpCostMetrics{ + /*forward_runtime=*/k3_on_mv3_cost, + /*backward_runtime=*/k3_on_mv3_cost, + /*memory_usage=*/k3_on_mv3_mem_usage, + }, + }, + }}, + std::unordered_map{{ + { + empty_tensor_set_movement(), + 0_ms, + }, + { + mk_tensor_set_movement(mv2, mv2), + mv2_to_mv2_cost, + }, + { + mk_tensor_set_movement(mv2, mv3), + mv2_to_mv3_cost, + }, + { + mk_tensor_set_movement(mv3, mv2), + mv3_to_mv2_cost, + }, + { + mk_tensor_set_movement(mv3, mv3), + mv3_to_mv3_cost, + }, + }}); + }; + + MachineMappingProblemTree problem_tree = + make_series_split(k2_to_k3, make_leaf(k2), make_leaf(k3)); + + MachineMappingConstraints constraints = + get_unconstrained_solution_for_layers( + get_all_leaf_paths(problem_tree)); + + MachineMappingWithMemoryCache cache = + empty_machine_mapping_with_memory_cache(); + + + SUBCASE("solution is mv2, mv3 due to runtime") { + CostEstimator cost_estimator = mk_cost_estimator( + /*k2_on_mv2_cost=*/2_ms, + /*k2_on_mv2_mem_usage=*/2_bytes, + /*k2_on_mv3_cost=*/2.4_ms, + /*k2_on_mv3_mem_usage=*/2_bytes, + /*k3_on_mv2_cost=*/3.6_ms, + /*k3_on_mv2_mem_usage=*/2_bytes, + /*k3_on_mv3_cost=*/3_ms, + /*k3_on_mv3_mem_usage=*/2_bytes, + /*mv2_to_mv2_cost=*/0.1_ms, + /*mv2_to_mv3_cost=*/1.0_ms, + /*mv3_to_mv2_cost=*/0.3_ms, + /*mv3_to_mv3_cost=*/0.1_ms); + + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + if (k == runtime_only_from_unmapped_op_cost_estimate_key(k1)) { + return std::unordered_set{ + mv1, + }; + } else { + if (resources == four_nodes_resources) { + return std::unordered_set{mv2, mv3}; + } else if (resources == three_nodes_resources) { + return std::unordered_set{mv2, mv3}; + } else if (resources == two_nodes_resources) { + return std::unordered_set{mv2}; + } else { + return std::unordered_set{}; + } + }; + }; + + MachineMappingWithMemoryContext context = MachineMappingWithMemoryContext{ + cost_estimator, + optimizer_attrs, + allowed_machine_views, + }; + MachineMappingWithMemoryResult result = + get_optimal_machine_mapping_with_memory( + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ + ParetoOptimalMachineMapping{ + OpCostMetrics{ + /*forward_runtime=*/2_ms + 0.3_ms + 3_ms, + /*backward_runtime=*/2_ms + 0.3_ms + 3_ms, + /*memory_usage=*/4_bytes, + }, + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv1, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + mv1, + }, + }}, + }, + }}; + } + } + SUBCASE("pair of layers in parallel") { + auto mk_cost_estimator = [&]( + milliseconds_t k2_on_mv2_cost, + num_bytes_t k2_on_mv2_mem_usage, + milliseconds_t k2_on_mv3_cost, + num_bytes_t k2_on_mv3_mem_usage, + milliseconds_t k3_on_mv2_cost, + num_bytes_t k3_on_mv2_mem_usage, + milliseconds_t k3_on_mv3_cost, + num_bytes_t k3_on_mv3_mem_usage) { + + return make_fake_cost_estimator( + std::unordered_map{{ + { + map_unmapped_op_cost_estimate_key(k2, mv2), + OpCostMetrics{ + /*forward_runtime=*/k2_on_mv2_cost, + /*backward_runtime=*/k2_on_mv2_cost, + /*memory_usage=*/k2_on_mv2_mem_usage, + }, + }, + { + map_unmapped_op_cost_estimate_key(k2, mv3), + OpCostMetrics{ + /*forward_runtime=*/k2_on_mv3_cost, + /*backward_runtime=*/k2_on_mv3_cost, + /*memory_usage=*/k2_on_mv3_mem_usage, + }, + }, + { + map_unmapped_op_cost_estimate_key(k3, mv2), + OpCostMetrics{ + /*forward_runtime=*/k3_on_mv2_cost, + /*backward_runtime=*/k3_on_mv2_cost, + /*memory_usage=*/k3_on_mv2_mem_usage, + }, + }, + { + map_unmapped_op_cost_estimate_key(k3, mv3), + OpCostMetrics{ + /*forward_runtime=*/k3_on_mv3_cost, + /*backward_runtime=*/k3_on_mv3_cost, + /*memory_usage=*/k3_on_mv3_mem_usage, + }, + }, + }}, + std::unordered_map{ + { + empty_tensor_set_movement(), + 0_ms, + }, + }); + }; + + CostEstimator cost_estimator = mk_cost_estimator( + /*k2_on_mv2_cost=*/2_ms, + /*k2_on_mv2_mem_usage=*/3_bytes, + /*k2_on_mv3_cost=*/2.5_ms, + /*k2_on_mv3_mem_usage=*/2_bytes, + /*k3_on_mv2_cost=*/2.5_ms, + /*k3_on_mv2_mem_usage=*/2_bytes, + /*k3_on_mv3_cost=*/2_ms, + /*k3_on_mv3_mem_usage=*/1_bytes); + + auto allowed_machine_views = + [&](UnmappedRuntimeOnlyOpCostEstimateKey const &k, + MachineComputeResourceSlice const &resources) { + if (k == runtime_only_from_unmapped_op_cost_estimate_key(k1)) { + return std::unordered_set{ + mv1, + }; + } else { + if (resources == four_nodes_resources) { + return std::unordered_set{mv2, mv3}; + } else if (resources == three_nodes_resources) { + return std::unordered_set{mv2, mv3}; + } else if (resources == two_nodes_resources) { + return std::unordered_set{mv2}; + } else { + return std::unordered_set{}; + } + }; + }; + + MachineMappingWithMemoryContext context = MachineMappingWithMemoryContext{ + cost_estimator, + optimizer_attrs, + allowed_machine_views, + }; + MachineMappingProblemTree problem_tree = - make_parallel_split(make_leaf(k1), make_leaf(k2)); + make_parallel_split(make_leaf(k2), make_leaf(k3)); MachineMappingConstraints constraints = get_unconstrained_solution_for_layers( get_all_leaf_paths(problem_tree)); + MachineMappingWithMemoryCache cache = + empty_machine_mapping_with_memory_cache(); + MachineMappingWithMemoryResult result = get_optimal_machine_mapping_with_memory( - cache, context, problem_tree, full_machine_spec, constraints); - MachineMappingWithMemoryResult correct = - MachineMappingWithMemoryResult{{MachineMappingForSingleLayer{ - OpCostMetrics{/*forward_runtime=*/2.5_ms, - /*backward_runtime=*/2.5_ms, - /*memory_usage=*/2_bytes}, - ParallelLayerGuidObliviousMachineMapping{{ - { - BinaryTreePath{{ - BinaryTreePathEntry::LEFT_CHILD, - }}, - mv2, - }, - { - BinaryTreePath{{ - BinaryTreePathEntry::RIGHT_CHILD, - }}, - mv2, - }, - }}, + cache, context, problem_tree, four_nodes_resources, constraints); + + MachineView translated_mv2 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/2_n, + /*device_idx=*/0_n, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + /*stride=*/stride_t{1_p}, + /*projection=*/MachineSpecificationDimension::INTER_NODE, + }, + }, + }; - }}}; + MachineMappingWithMemoryResult correct = + MachineMappingWithMemoryResult{ + /*pareto_frontier=*/{ + ParetoOptimalMachineMapping{ + OpCostMetrics{ + /*forward_runtime=*/2.5_ms, + /*backward_runtime=*/2.5_ms, + /*memory_usage=*/3_bytes, + }, + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv2, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + translated_mv2, + }, + }}, + }, + ParetoOptimalMachineMapping{ + OpCostMetrics{ + /*forward_runtime=*/4.5_ms, + /*backward_runtime=*/4.5_ms, + /*memory_usage=*/3_bytes, + }, + ParallelLayerGuidObliviousMachineMapping{{ + { + BinaryTreePath{{ + BinaryTreePathEntry::LEFT_CHILD, + }}, + mv3, + }, + { + BinaryTreePath{{ + BinaryTreePathEntry::RIGHT_CHILD, + }}, + mv3, + }, + }}, + }, + }, + }; - CHECK(result == correct); + ASSERT(result == correct); } } } diff --git a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc similarity index 74% rename from lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc rename to lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc index 2192b442cd..df9e308874 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_result_with_memory.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.cc @@ -2,155 +2,99 @@ #include "pcg/machine_view.h" #include "utils/nonnegative_int/nonnegative_int.h" #include +#include "test/utils/rapidcheck/some.h" +#include "test/utils/doctest/fmt/unordered_set.h" using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("remove_non_pareto_optimal_machine_mapping_result") { - MachineView machine_view_0 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0_n, - /*device_idx=*/0_n, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/ - { - MachineViewDimension{ - stride_t{1_p}, - MachineSpecificationDimension::INTRA_NODE, + TEST_CASE("MachineMappingWithMemoryResult") { + SUBCASE("initialization") { + SUBCASE("throws if initialized with non-pareto-optimal elements") { + CHECK_THROWS( + MachineMappingWithMemoryResult{{ + ParetoOptimalMachineMapping{ + /*cost=*/OpCostMetrics{ + /*forward_runtime=*/5_ms, + /*backward_runtime=*/5_ms, + /*memory_usage=*/6_bytes, + }, + /*machine_mapping=*/some(), }, - }, - }; - - MachineView machine_view_1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0_n, - /*device_idx=*/0_n, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/ - { - MachineViewDimension{ - stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE, + ParetoOptimalMachineMapping{ + /*cost=*/OpCostMetrics{ + /*forward_runtime=*/2_ms, + /*backward_runtime=*/4_ms, + /*memory_usage=*/5_bytes, + }, + /*machine_mapping=*/some(), }, - }, - }; + }}); + } - MachineView machine_view_2 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0_n, - /*device_idx=*/0_n, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/ - { - MachineViewDimension{ - stride_t{4_p}, - MachineSpecificationDimension::INTRA_NODE, + SUBCASE("allows elements with identical performance") { + ParetoOptimalMachineMapping mapping1 = + ParetoOptimalMachineMapping{ + /*cost=*/OpCostMetrics{ + /*forward_runtime=*/5_ms, + /*backward_runtime=*/5_ms, + /*memory_usage=*/6_bytes, }, - }, - }; - - OpCostMetrics cost1 = OpCostMetrics{ - /*forward_runtime=*/2_ms, - /*backward_runtime=*/2_ms, - /*memory_usage=*/2_bytes, - }; - - OpCostMetrics cost2 = OpCostMetrics{ - /*forward_runtime=*/4_ms, - /*backward_runtime=*/4_ms, - /*memory_usage=*/1_bytes, - }; - - OpCostMetrics cost3 = OpCostMetrics{ - /*forward_runtime=*/2_ms, - /*backward_runtime=*/2_ms, - /*memory_usage=*/3_bytes, - }; + /*machine_mapping=*/some(), + }; - MachineMappingForSingleLayer mm1 = MachineMappingForSingleLayer{ - cost1, - ParallelLayerGuidObliviousMachineMapping{ - { - { - BinaryTreePath{{}}, - machine_view_0, - }, + ParetoOptimalMachineMapping mapping2 = + ParetoOptimalMachineMapping{ + /*cost=*/OpCostMetrics{ + /*forward_runtime=*/5_ms, + /*backward_runtime=*/5_ms, + /*memory_usage=*/5_bytes, }, - }, - }; + /*machine_mapping=*/some(), + }; - MachineMappingForSingleLayer mm2 = MachineMappingForSingleLayer{ - cost2, - ParallelLayerGuidObliviousMachineMapping{ - { - { - BinaryTreePath{{}}, - machine_view_1, - }, + ParetoOptimalMachineMapping mapping3 = + ParetoOptimalMachineMapping{ + /*cost=*/OpCostMetrics{ + /*forward_runtime=*/5_ms, + /*backward_runtime=*/5_ms, + /*memory_usage=*/6_bytes, }, - }, - }; + /*machine_mapping=*/some(), + }; - MachineMappingForSingleLayer mm3 = MachineMappingForSingleLayer{ - cost3, - ParallelLayerGuidObliviousMachineMapping{ - { - { - BinaryTreePath{{}}, - machine_view_2, - }, - }, - }, - }; + - SUBCASE("empty") { - MachineMappingWithMemoryResult before_remove = - empty_machine_mapping_with_memory_result(); - MachineMappingWithMemoryResult result = - remove_non_pareto_optimal_machine_mapping_result(before_remove); - MachineMappingWithMemoryResult correct = - empty_machine_mapping_with_memory_result(); + MachineMappingWithMemoryResult mapping_result + = MachineMappingWithMemoryResult{{ + mapping1, + mapping2, + mapping3, + }}; - CHECK(result == correct); - } + std::unordered_set + result = mapping_result.get_pareto_frontier(); - SUBCASE("all solutions are pareto-optimal") { - MachineMappingWithMemoryResult before_remove = - MachineMappingWithMemoryResult{ - { - mm1, - mm2, - }, - }; - MachineMappingWithMemoryResult result = - remove_non_pareto_optimal_machine_mapping_result(before_remove); - MachineMappingWithMemoryResult correct = before_remove; + std::unordered_set correct = { + mapping1, + mapping2, + mapping3, + }; - CHECK(result == correct); - } + CHECK(result == correct); + } - SUBCASE("there exists a non-pareto-optimal solution") { - MachineMappingWithMemoryResult before_remove = - MachineMappingWithMemoryResult{ - { - mm1, - mm2, - mm3, - }, - }; - MachineMappingWithMemoryResult result = - remove_non_pareto_optimal_machine_mapping_result(before_remove); - MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{ - { - mm1, - mm2, - }, - }; + SUBCASE("allows empty set") { + MachineMappingWithMemoryResult mapping_result + = MachineMappingWithMemoryResult{{}}; - CHECK(result == correct); + std::unordered_set + result = mapping_result.get_pareto_frontier(); + + std::unordered_set correct = {}; + + CHECK(result == correct); + } } } @@ -193,7 +137,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*memory_usage=*/2_bytes, }; MachineMappingWithMemoryResult pre = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ pre_cost, ParallelLayerGuidObliviousMachineMapping{ { @@ -221,7 +165,7 @@ TEST_SUITE(FF_TEST_SUITE) { }; MachineMappingWithMemoryResult post = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ post_cost, ParallelLayerGuidObliviousMachineMapping{ { @@ -259,7 +203,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingWithMemoryResult no_parallel_split_transform = MachineMappingWithMemoryResult{ { - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ /*cost=*/OpCostMetrics{ /*forward_runtime=*/pre_cost.forward_runtime + comm_cost + post_cost.forward_runtime, @@ -316,7 +260,7 @@ TEST_SUITE(FF_TEST_SUITE) { comm_cost, pre, post, ParallelSplitTransformation::RthenL); MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{ { - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ /*cost=*/OpCostMetrics{ /*forward_runtime=*/pre_cost.forward_runtime + comm_cost + post_cost.forward_runtime, @@ -396,7 +340,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*memory_usage=*/2_bytes, }; MachineMappingWithMemoryResult lhs = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ lhs_cost, ParallelLayerGuidObliviousMachineMapping{ { @@ -423,7 +367,7 @@ TEST_SUITE(FF_TEST_SUITE) { /*memory_usage=*/1_bytes, }; MachineMappingWithMemoryResult rhs = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ rhs_cost, ParallelLayerGuidObliviousMachineMapping{ { @@ -439,24 +383,45 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingWithMemoryResult empty = empty_machine_mapping_with_memory_result(); + MachineResourceSplit split = MachineResourceSplit{ + /*offset=*/3_p, + /*dimension=*/MachineSpecificationDimension::INTER_NODE, + }; + SUBCASE("lhs is empty") { - MachineMappingWithMemoryResult result = parallel_combine(empty, rhs); + MachineMappingWithMemoryResult result = parallel_combine(split, empty, rhs); MachineMappingWithMemoryResult correct = empty; CHECK(result == correct); } SUBCASE("rhs is empty") { - MachineMappingWithMemoryResult result = parallel_combine(lhs, empty); + MachineMappingWithMemoryResult result = parallel_combine(split, lhs, empty); MachineMappingWithMemoryResult correct = empty; CHECK(result == correct); } SUBCASE("both are nonempty") { - MachineMappingWithMemoryResult result = parallel_combine(lhs, rhs); + MachineMappingWithMemoryResult result = parallel_combine(split, lhs, rhs); + + MachineView translated_machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/3_n, + /*device_idx=*/0_n, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{{ - MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping{ /*cost=*/OpCostMetrics{ /*forward_runtime=*/std::max(lhs_cost.forward_runtime, rhs_cost.forward_runtime), @@ -481,7 +446,7 @@ TEST_SUITE(FF_TEST_SUITE) { }, { BinaryTreePath{{BinaryTreePathEntry::RIGHT_CHILD}}, - machine_view_1, + translated_machine_view_1, }, }, }, @@ -492,7 +457,7 @@ TEST_SUITE(FF_TEST_SUITE) { } } - TEST_CASE("minimize_runtime(memory)") { + TEST_CASE("minimize_runtime(MachineMappingWithMemoryResult, MachineMappingWithMemoryResult)") { MachineView machine_view_0 = MachineView{ /*start=*/MachineSpaceCoordinate{ /*node_idx=*/0_n, @@ -549,12 +514,12 @@ TEST_SUITE(FF_TEST_SUITE) { /*memory_usage=*/1_bytes, }; OpCostMetrics cost3 = OpCostMetrics{ - /*forward_runtime=*/2_ms, - /*backward_runtime=*/2_ms, + /*forward_runtime=*/2.5_ms, + /*backward_runtime=*/2.5_ms, /*memory_usage=*/3_bytes, }; - MachineMappingForSingleLayer mm1 = MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping mm1 = ParetoOptimalMachineMapping{ cost1, ParallelLayerGuidObliviousMachineMapping{ { @@ -566,7 +531,7 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - MachineMappingForSingleLayer mm2 = MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping mm2 = ParetoOptimalMachineMapping{ cost2, ParallelLayerGuidObliviousMachineMapping{ { @@ -578,7 +543,7 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - MachineMappingForSingleLayer mm3 = MachineMappingForSingleLayer{ + ParetoOptimalMachineMapping mm3 = ParetoOptimalMachineMapping{ cost3, ParallelLayerGuidObliviousMachineMapping{ { @@ -590,21 +555,21 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - MachineMappingWithMemoryResult result1 = MachineMappingWithMemoryResult{ + MachineMappingWithMemoryResult mapping_result1 = MachineMappingWithMemoryResult{ { mm1, mm2, }, }; - MachineMappingWithMemoryResult result2 = MachineMappingWithMemoryResult{ + MachineMappingWithMemoryResult mapping_result2 = MachineMappingWithMemoryResult{ { mm2, mm3, }, }; - MachineMappingWithMemoryResult result = minimize_runtime(result1, result2); + MachineMappingWithMemoryResult result = minimize_runtime(mapping_result1, mapping_result2); MachineMappingWithMemoryResult correct = MachineMappingWithMemoryResult{ { mm1, diff --git a/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc b/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc index 6571b78540..f4059772ee 100644 --- a/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc +++ b/lib/compiler/test/src/compiler/task_graph_simulator/task_simulator.cc @@ -12,7 +12,6 @@ #include "pcg/device_id.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_space_coordinate.dtg.h" -#include "pcg/machine_specification.h" #include "pcg/machine_specification_dimension.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" @@ -38,11 +37,17 @@ namespace FlexFlow { TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("task_simulator_estimate_forward_pass_time") { MachineSpecification machine_spec = - MachineSpecification{/*num_nodes=*/3_p, - /*num_cpus_per_node=*/3_p, - /*num_gpus_per_node=*/3_p, - /*inter_node_bandwidth=*/1.0f, - /*intra_node_bandwidth=*/1.0f}; + MachineSpecification{ + MachineComputeSpecification{ + /*num_nodes=*/3_p, + /*num_cpus_per_node=*/3_p, + /*num_gpus_per_node=*/3_p, + }, + MachineInterconnectSpecification{ + /*inter_node_bandwidth=*/bytes_per_second_t{1.0f}, + /*intra_node_bandwidth=*/bytes_per_second_t{1.0f}, + }, + }; SUBCASE("linear graph") { ParallelComputationGraphBuilder b; @@ -61,16 +66,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_layer_guid_t layer0 = get_source_layer(tensor0); parallel_layer_guid_t layer1 = get_source_layer(tensor1); - std::vector dims = { - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - }; + std::vector dims = {}; ParallelComputationGraph pcg = b.pcg; MachineView mv1 = MachineView{MachineSpaceCoordinate{0_n, 0_n, DeviceType::GPU}, dims}; @@ -150,16 +146,7 @@ TEST_SUITE(FF_TEST_SUITE) { parallel_layer_guid_t layer3 = get_source_layer(tensor3); ParallelComputationGraph pcg = b.pcg; - std::vector dims = { - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - }; + std::vector dims = {}; SUBCASE("all different devices") { MachineView mv0 = MachineView{ diff --git a/lib/compiler/test/src/internal/cost_estimator_for_test.cc b/lib/compiler/test/src/internal/cost_estimator_for_test.cc index 60bf6ba7a4..3683796a5c 100644 --- a/lib/compiler/test/src/internal/cost_estimator_for_test.cc +++ b/lib/compiler/test/src/internal/cost_estimator_for_test.cc @@ -3,6 +3,7 @@ #include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_tensor_set_movement.h" #include "compiler/machine_mapping/machine_mapping_problem_tree/unmapped_op_cost_estimate_key.h" #include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/containers/contains_key.h" namespace FlexFlow { @@ -38,8 +39,14 @@ CostEstimator make_fake_cost_estimator( std::unordered_map const &comm_cost_map) { return make_fake_cost_estimator( - [op_cost_map](OpCostEstimateKey const &k) { return op_cost_map.at(k); }, + [op_cost_map](OpCostEstimateKey const &k) { + ASSERT(contains_key(op_cost_map, k), + k); + return op_cost_map.at(k); + }, [comm_cost_map](TensorSetMovement const &m) { + ASSERT(contains_key(comm_cost_map, m), + m); return comm_cost_map.at(m); }); } diff --git a/lib/compiler/test/src/internal/runtime_only_cost_estimator_for_test.cc b/lib/compiler/test/src/internal/runtime_only_cost_estimator_for_test.cc index c52344c6b3..59bf08a399 100644 --- a/lib/compiler/test/src/internal/runtime_only_cost_estimator_for_test.cc +++ b/lib/compiler/test/src/internal/runtime_only_cost_estimator_for_test.cc @@ -5,6 +5,7 @@ #include "compiler/cost_estimator/op_cost_metrics.h" #include "compiler/cost_estimator/runtime_only_cost_estimator_from_cost_estimator.h" #include "internal/cost_estimator_for_test.h" +#include "utils/containers/contains_key.h" namespace FlexFlow { @@ -31,9 +32,13 @@ RuntimeOnlyCostEstimator make_fake_runtime_only_cost_estimator( &comm_cost_map) { return make_fake_runtime_only_cost_estimator( [op_cost_map](RuntimeOnlyOpCostEstimateKey const &k) { + ASSERT(contains_key(op_cost_map, k), k); + return op_cost_map.at(k); }, [comm_cost_map](TensorSetMovement const &m) { + ASSERT(contains_key(comm_cost_map, m), m); + return comm_cost_map.at(m); }); } diff --git a/lib/compiler/test/src/unity_algorithm.cc b/lib/compiler/test/src/unity_algorithm.cc index 8ff0978ea5..9004fc2c66 100644 --- a/lib/compiler/test/src/unity_algorithm.cc +++ b/lib/compiler/test/src/unity_algorithm.cc @@ -1,5 +1,5 @@ #include "compiler/unity_algorithm.h" -#include "doctest/doctest.h" +#include TEST_SUITE(FF_TEST_SUITE) { // Rapidcheck does not work for now diff --git a/lib/kernels/include/kernels/batch_matmul_kernels.h b/lib/kernels/include/kernels/batch_matmul_kernels.h index db377162b6..d54663f110 100644 --- a/lib/kernels/include/kernels/batch_matmul_kernels.h +++ b/lib/kernels/include/kernels/batch_matmul_kernels.h @@ -1,37 +1,31 @@ #ifndef _FLEXFLOW_OPS_KERNELS_BATCH_MATMUL_KERNELS_H #define _FLEXFLOW_OPS_KERNELS_BATCH_MATMUL_KERNELS_H +#include "kernels/accessor.h" #include "kernels/device_handle_t.dtg.h" #include "kernels/device_stream_t.dtg.h" #include "kernels/ff_handle.h" +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow::Kernels::BatchMatmul { void forward_kernel(device_stream_t const &stream, device_handle_t const &handle, - float *output_ptr, - float const *a_input_ptr, - float const *b_input_ptr, - int m, - int n, - int k, - int batch, - int seq_length, - int a_seq_length_dim, - int b_seq_length_dim); + GenericTensorAccessorW const &output, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorR const &input_b, + positive_int seq_length, + std::optional a_seq_length_dim, + std::optional b_seq_length_dim); void backward_kernel(device_stream_t const &stream, device_handle_t const &handle, - float const *o_ptr, - float const *o_grad_ptr, - float const *a_ptr, - float *a_grad_ptr, - float const *b_ptr, - float *b_grad_ptr, - int m, - int n, - int k, - int batch); + GenericTensorAccessorR const &output, + GenericTensorAccessorR const &output_grad, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorW const &input_a_grad, + GenericTensorAccessorR const &input_b, + GenericTensorAccessorW const &input_b_grad); } // namespace FlexFlow::Kernels::BatchMatmul diff --git a/lib/kernels/include/kernels/batch_matmul_kernels_cpu.h b/lib/kernels/include/kernels/batch_matmul_kernels_cpu.h index fdef3d7fa1..6d9c804be2 100644 --- a/lib/kernels/include/kernels/batch_matmul_kernels_cpu.h +++ b/lib/kernels/include/kernels/batch_matmul_kernels_cpu.h @@ -5,27 +5,19 @@ namespace FlexFlow::Kernels::BatchMatmul { -void cpu_forward_kernel(float *output_ptr, - float const *a_input_ptr, - float const *b_input_ptr, - int m, - int n, - int k, - int batch, - int seq_length, - int a_seq_length_dim, - int b_seq_length_dim); +void cpu_forward_kernel(GenericTensorAccessorW const &output, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorR const &input_b, + positive_int seq_length, + std::optional a_seq_length_dim, + std::optional b_seq_length_dim); -void cpu_backward_kernel(float const *o_ptr, - float const *o_grad_ptr, - float const *a_ptr, - float *a_grad_ptr, - float const *b_ptr, - float *b_grad_ptr, - int m, - int n, - int k, - int batch); +void cpu_backward_kernel(GenericTensorAccessorR const &output, + GenericTensorAccessorR const &output_grad, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorW const &input_a_grad, + GenericTensorAccessorR const &input_b, + GenericTensorAccessorW const &input_b_grad); } // namespace FlexFlow::Kernels::BatchMatmul diff --git a/lib/kernels/include/kernels/batch_matmul_kernels_gpu.h b/lib/kernels/include/kernels/batch_matmul_kernels_gpu.h index 4a35c000c3..1e13755b81 100644 --- a/lib/kernels/include/kernels/batch_matmul_kernels_gpu.h +++ b/lib/kernels/include/kernels/batch_matmul_kernels_gpu.h @@ -10,8 +10,8 @@ namespace FlexFlow::Kernels::BatchMatmul { void gpu_forward_kernel(ffStream_t stream, PerDeviceFFHandle const &handle, float *output_ptr, - float const *a_input_ptr, - float const *b_input_ptr, + float const *input_a_ptr, + float const *input_b_ptr, int m, int n, int k, @@ -22,12 +22,12 @@ void gpu_forward_kernel(ffStream_t stream, void gpu_backward_kernel(ffStream_t stream, PerDeviceFFHandle const &handle, - float const *o_ptr, - float const *o_grad_ptr, - float const *a_ptr, - float *a_grad_ptr, - float const *b_ptr, - float *b_grad_ptr, + float const *output_ptr, + float const *output_grad_ptr, + float const *input_a_ptr, + float *input_a_grad_ptr, + float const *input_b_ptr, + float *input_b_grad_ptr, int m, int n, int k, diff --git a/lib/kernels/include/kernels/create_accessor_with_contents.h b/lib/kernels/include/kernels/create_accessor_with_contents.h index 3574ad0c88..6d3aebd4f4 100644 --- a/lib/kernels/include/kernels/create_accessor_with_contents.h +++ b/lib/kernels/include/kernels/create_accessor_with_contents.h @@ -41,10 +41,10 @@ GenericTensorAccessorW create_2d_accessor_w_with_contents( std::vector> const &contents, Allocator &allocator) { positive_int nrows = positive_int{num_elements(contents)}; - positive_int ncols = throw_if_unexpected( + positive_int ncols = require_all_same1(transform(contents, [](std::vector const &row) { return positive_int{num_elements(row)}; - }))); + })); TensorShape shape = TensorShape{ TensorDims{FFOrdered{nrows, ncols}}, @@ -78,18 +78,18 @@ GenericTensorAccessorW create_3d_accessor_w_with_contents( Allocator &allocator) { positive_int dim0_size = positive_int{num_elements(contents)}; - positive_int dim1_size = throw_if_unexpected(require_all_same1( + positive_int dim1_size = require_all_same1( transform(contents, [](std::vector> const &m) { return positive_int{num_elements(m)}; - }))); + })); - positive_int dim2_size = throw_if_unexpected(require_all_same1( + positive_int dim2_size = require_all_same1( transform(contents, [](std::vector> const &m) { - return throw_if_unexpected( + return require_all_same1(transform(m, [](std::vector const &vec) { return positive_int{num_elements(vec)}; - }))); - }))); + })); + })); TensorShape shape = TensorShape{ TensorDims{FFOrdered{dim0_size, dim1_size, dim2_size}}, @@ -127,29 +127,29 @@ GenericTensorAccessorW create_4d_accessor_w_with_contents( Allocator &allocator) { positive_int dim0_size = positive_int{num_elements(contents)}; - positive_int dim1_size = throw_if_unexpected(require_all_same1(transform( + positive_int dim1_size = require_all_same1(transform( contents, [](std::vector>> const &t) { return positive_int{num_elements(t)}; - }))); + })); - positive_int dim2_size = throw_if_unexpected(require_all_same1(transform( + positive_int dim2_size = require_all_same1(transform( contents, [](std::vector>> const &m) { - return throw_if_unexpected(require_all_same1( + return require_all_same1( transform(m, [](std::vector> const &vec) { return positive_int{num_elements(vec)}; - }))); - }))); + })); + })); - positive_int dim3_size = throw_if_unexpected(require_all_same1(transform( + positive_int dim3_size = require_all_same1(transform( contents, [](std::vector>> const &t) { - return throw_if_unexpected(require_all_same1( + return require_all_same1( transform(t, [](std::vector> const &mat) { - return throw_if_unexpected(require_all_same1( + return require_all_same1( transform(mat, [](std::vector const &vec) { return positive_int{num_elements(vec)}; - }))); - }))); - }))); + })); + })); + })); TensorShape shape = TensorShape{ TensorDims{FFOrdered{dim0_size, dim1_size, dim2_size, dim3_size}}, diff --git a/lib/kernels/include/kernels/embedding_kernels.h b/lib/kernels/include/kernels/embedding_kernels.h index e9c158598a..9a23386efe 100644 --- a/lib/kernels/include/kernels/embedding_kernels.h +++ b/lib/kernels/include/kernels/embedding_kernels.h @@ -14,8 +14,8 @@ void forward_kernel(device_stream_t const &stream, DataType input_data_type, DataType output_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size); void backward_kernel(device_stream_t const &stream, @@ -25,8 +25,8 @@ void backward_kernel(device_stream_t const &stream, DataType output_data_type, DataType input_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size); } // namespace FlexFlow::Kernels::Embedding diff --git a/lib/kernels/include/kernels/embedding_kernels_cpu.h b/lib/kernels/include/kernels/embedding_kernels_cpu.h index 23e32589ae..c2430ba987 100644 --- a/lib/kernels/include/kernels/embedding_kernels_cpu.h +++ b/lib/kernels/include/kernels/embedding_kernels_cpu.h @@ -12,8 +12,8 @@ void cpu_forward_kernel(GenericTensorAccessorR const &input, DataType input_data_type, DataType output_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size); void cpu_backward_kernel(GenericTensorAccessorR const &output, @@ -22,8 +22,8 @@ void cpu_backward_kernel(GenericTensorAccessorR const &output, DataType output_data_type, DataType input_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size); } // namespace FlexFlow::Kernels::Embedding diff --git a/lib/kernels/include/kernels/legion_dim.h b/lib/kernels/include/kernels/legion_dim.h index 24eff46e22..8649d2cbeb 100644 --- a/lib/kernels/include/kernels/legion_dim.h +++ b/lib/kernels/include/kernels/legion_dim.h @@ -5,6 +5,7 @@ #include "kernels/legion_ordered/legion_ordered.h" #include "op-attrs/ff_dim_t.dtg.h" #include "op-attrs/ff_ordered/ff_ordered.h" +#include "op-attrs/num_tensor_dims_t.h" #include "op-attrs/tensor_dims.dtg.h" #include "utils/containers/set_of.h" #include "utils/containers/transform.h" @@ -19,9 +20,9 @@ positive_int &dim_at_idx(TensorDims &, legion_dim_t); legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value); -legion_dim_t legion_dim_from_ff_dim(ff_dim_t, nonnegative_int num_dimensions); +legion_dim_t legion_dim_from_ff_dim(ff_dim_t, num_tensor_dims_t num_dimensions); -ff_dim_t ff_dim_from_legion_dim(legion_dim_t, nonnegative_int num_dimensions); +ff_dim_t ff_dim_from_legion_dim(legion_dim_t, num_tensor_dims_t num_dimensions); template std::set key_range(LegionOrdered const &d) { diff --git a/lib/kernels/include/kernels/map_tensor_accessors.h b/lib/kernels/include/kernels/map_tensor_accessors.h index f7aa6a1001..7473ee26e1 100644 --- a/lib/kernels/include/kernels/map_tensor_accessors.h +++ b/lib/kernels/include/kernels/map_tensor_accessors.h @@ -99,11 +99,11 @@ struct CPUMapTensorAccessors2 { GenericTensorAccessorW &output, F &&f) { - TensorDims dims = throw_if_unexpected(require_all_same1(std::vector{ + TensorDims dims = require_all_same1(std::vector{ lhs.shape.dims, rhs.shape.dims, output.shape.dims, - })); + }); ASSERT(lhs.device_type == DeviceType::CPU); ASSERT(rhs.device_type == DeviceType::CPU); diff --git a/lib/kernels/include/kernels/perf_metrics.h b/lib/kernels/include/kernels/perf_metrics.h index c4a34e4f79..69f96491e0 100644 --- a/lib/kernels/include/kernels/perf_metrics.h +++ b/lib/kernels/include/kernels/perf_metrics.h @@ -1,37 +1,11 @@ #ifndef _FLEXFLOW_KERNELS_INCLUDE_KERNELS_PERF_METRICS_H #define _FLEXFLOW_KERNELS_INCLUDE_KERNELS_PERF_METRICS_H -#include "utils/fmt.h" -#include "utils/visitable.h" +#include "kernels/perf_metrics.dtg.h" +#include namespace FlexFlow { -struct PerfMetrics : public use_visitable_cmp { - PerfMetrics() = delete; - PerfMetrics(double start_time); - PerfMetrics(int train_all, - std::optional train_correct, - std::optional cce_loss, - std::optional sparse_cce_loss, - std::optional mse_loss, - std::optional rmse_loss, - std::optional mae_loss, - double start_time_micro, - double current_time_micro); - - int train_all = 0; // measure_accuracy_denominator - std::optional train_correct = 0; // measure_accuracy numerator - std::optional cce_loss = - std::nullopt; // measure_categorical_crossentropy - std::optional sparse_cce_loss = - 0.0f; // measure_sparse_categorical_crossentropy - std::optional mse_loss = 0.0f; // measure_mean_squared_error - std::optional rmse_loss = 0.0f; // measure_root_mean_squared_error - std::optional mae_loss = 0.0f; // measure_mean_absolute_error - double start_time; - double current_time; -}; - float get_throughput(PerfMetrics const &); float get_accuracy(PerfMetrics const &); @@ -40,16 +14,6 @@ PerfMetrics apply_scale(PerfMetrics const &, float scale); } // namespace FlexFlow -VISITABLE_STRUCT(::FlexFlow::PerfMetrics, - train_all, - train_correct, - cce_loss, - sparse_cce_loss, - mse_loss, - rmse_loss, - mae_loss, - start_time); - namespace fmt { template <> diff --git a/lib/kernels/include/kernels/perf_metrics.struct.toml b/lib/kernels/include/kernels/perf_metrics.struct.toml new file mode 100644 index 0000000000..d7f1b67a35 --- /dev/null +++ b/lib/kernels/include/kernels/perf_metrics.struct.toml @@ -0,0 +1,59 @@ +namespace = "FlexFlow" +name = "PerfMetrics" +features = [ + "eq", + "hash", + "json", +] + +includes = [ + "", +] + +src_includes = [ + "utils/fmt/optional.h", + "utils/json/optional.h", +] + +[[fields]] +name = "train_all" +type = "int" +docstring = "measure_accuracy denominator" + +[[fields]] +name = "train_correct" +type = "std::optional" +docstring = "measure_accuracy numerator" + +[[fields]] +name = "cce_loss" +type = "std::optional" +docstring = "measure_categorical_crossentropy" + +[[fields]] +name = "sparse_cce_loss" +type = "std::optional" +docstring = "measure_sparse_categorical_crossentropy" + +[[fields]] +name = "mse_loss" +type = "std::optional" +docstring = "measure_mean_squared_error" + +[[fields]] +name = "rmse_loss" +type = "std::optional" +docstring = "measure_root_mean_squared_error" + +[[fields]] +name = "mae_loss" +type = "std::optional" +docstring = "measure_mean_absolute_error" + +[[fields]] +name = "start_time" +type = "double" + +[[fields]] +name = "current_time" +type = "double" diff --git a/lib/kernels/include/kernels/profiling.h b/lib/kernels/include/kernels/profiling.h index c0a0e794e3..7b34405da7 100644 --- a/lib/kernels/include/kernels/profiling.h +++ b/lib/kernels/include/kernels/profiling.h @@ -5,12 +5,13 @@ #include "kernels/device_stream_t.h" #include "kernels/profiling_settings.dtg.h" #include "pcg/device_type.dtg.h" +#include "utils/units/milliseconds_t.h" #include namespace FlexFlow { template -std::optional profiling_wrapper(F const &f, +std::optional profiling_wrapper(F const &f, bool enable_profiling, DeviceType device_type, Ts &&...ts) { @@ -27,7 +28,7 @@ std::optional profiling_wrapper(F const &f, } template -std::optional profiling_wrapper(F const &f, +std::optional profiling_wrapper(F const &f, ProfilingSettings const &settings, DeviceType device_type, Ts &&...ts) { diff --git a/lib/kernels/include/kernels/reduce_tensor_accessor.h b/lib/kernels/include/kernels/reduce_tensor_accessor.h index a06afbf5f6..16e487a5e1 100644 --- a/lib/kernels/include/kernels/reduce_tensor_accessor.h +++ b/lib/kernels/include/kernels/reduce_tensor_accessor.h @@ -33,7 +33,7 @@ struct CPUReduceTensorAccessorInDims { return contains(dims_to_reduce, dim); }; - std::unordered_map> + OneToMany output_coord_from_input_coord = group_by( get_tensor_dims_coord_set(input.shape.dims), [&](TensorDimsCoord const &input_coord) { @@ -41,7 +41,7 @@ struct CPUReduceTensorAccessorInDims { }); for (auto const &[output_coord, input_coords] : - output_coord_from_input_coord) { + output_coord_from_input_coord.l_to_r()) { std::vector input_values = transform( sorted(input_coords), [&](TensorDimsCoord const &input_coord) -> T { return input.at
(input_coord); diff --git a/lib/kernels/src/cuda/ops/element_binary_kernels.cu b/lib/kernels/src/cuda/ops/element_binary_kernels.cu index 7e13486429..e4c698ad02 100644 --- a/lib/kernels/src/cuda/ops/element_binary_kernels.cu +++ b/lib/kernels/src/cuda/ops/element_binary_kernels.cu @@ -18,6 +18,7 @@ #include "kernels/ff_handle.h" #include "op-attrs/datatype.h" #include "op-attrs/operator_type.h" +#include "utils/exception.h" namespace FlexFlow { namespace Kernels { diff --git a/lib/kernels/src/cuda/ops/pool_2d_kernels.cu b/lib/kernels/src/cuda/ops/pool_2d_kernels.cu index ec185a360e..4e06f2da02 100644 --- a/lib/kernels/src/cuda/ops/pool_2d_kernels.cu +++ b/lib/kernels/src/cuda/ops/pool_2d_kernels.cu @@ -15,6 +15,7 @@ #include "internal/device.h" #include "kernels/pool_2d_kernels_gpu.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/kernels/src/cuda/ops/softmax_kernels.cu b/lib/kernels/src/cuda/ops/softmax_kernels.cu index 85575d7bf6..1ecf42dfd8 100644 --- a/lib/kernels/src/cuda/ops/softmax_kernels.cu +++ b/lib/kernels/src/cuda/ops/softmax_kernels.cu @@ -15,6 +15,7 @@ #include "internal/device.h" #include "kernels/softmax_kernels_gpu.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/kernels/src/cuda/ops/transpose_kernels.cu b/lib/kernels/src/cuda/ops/transpose_kernels.cu index 85a259769c..b00f26cc1a 100644 --- a/lib/kernels/src/cuda/ops/transpose_kernels.cu +++ b/lib/kernels/src/cuda/ops/transpose_kernels.cu @@ -48,47 +48,45 @@ __global__ void transpose_simple_kernel(std::size_t volume, } } -static LegionOrdered - legion_ordered_perm_from_ff_ordered(FFOrdered const &perm) { - nonnegative_int perm_size = num_elements(perm); - LegionOrdered legion_ordered_perm = - transform(legion_ordered_from_ff_ordered(perm), [&](ff_dim_t d) { - return legion_dim_from_ff_dim(d, perm_size); - }); - - return legion_ordered_perm; -} - -void gpu_forward_kernel(cudaStream_t stream, - TransposeAttrs const &m, - GenericTensorAccessorR const &input, - GenericTensorAccessorW const &output) { +static TransposeStrides make_strides(TransposeAttrs const &m, + TensorDims const &input_dims, + TensorDims const &output_dims) { + ASSERT(get_num_dims(input_dims) == m.permutation.num_tensor_dims()); TransposeStrides info; - info.num_dim = get_num_dims(input.shape.dims).unwrap_nonnegative(); - assert(info.num_dim == m.perm.size()); - - LegionOrdered legion_ordered_perm = - legion_ordered_perm_from_ff_ordered(m.perm); + num_tensor_dims_t num_dims = m.permutation.num_tensor_dims(); + info.num_dim = num_dims.int_from_num_tensor_dims(); for (int i = 0; i < info.num_dim; i++) { + legion_dim_t legion_dim = legion_dim_t{nonnegative_int{i}}; + ff_dim_t ff_dim = ff_dim_from_legion_dim(legion_dim, num_dims); + if (i == 0) { info.in_strides[i] = 1; info.out_strides[i] = 1; } else { int in_dim_size = - dim_at_idx(input.shape.dims, legion_dim_t{nonnegative_int{i}}) + dim_at_idx(input_dims, legion_dim) .int_from_positive_int(); int out_dim_size = - dim_at_idx(output.shape.dims, legion_dim_t{nonnegative_int{i}}) + dim_at_idx(output_dims, legion_dim) .int_from_positive_int(); info.in_strides[i] = info.in_strides[i - 1] * in_dim_size; info.out_strides[i] = info.out_strides[i - 1] * out_dim_size; } - info.perm[i] = legion_ordered_perm.at(legion_dim_t{nonnegative_int{i}}) - .value.unwrap_nonnegative(); + ff_dim_t ff_permuted_dim = m.permutation.at_l(ff_dim); + legion_dim_t legion_permuted_dim = legion_dim_from_ff_dim(ff_permuted_dim, num_dims); + info.perm[i] = legion_permuted_dim.value.unwrap_nonnegative(); } + + return info; +} + +void gpu_forward_kernel(cudaStream_t stream, + TransposeAttrs const &m, + GenericTensorAccessorR const &input, + GenericTensorAccessorW const &output) { transpose_simple_kernel<<< GET_BLOCKS(get_num_elements(output.shape.dims).int_from_positive_int()), CUDA_NUM_THREADS, @@ -96,7 +94,7 @@ void gpu_forward_kernel(cudaStream_t stream, stream>>>(get_num_elements(output.shape.dims).int_from_positive_int(), input.get_float_ptr(), output.get_float_ptr(), - info, + make_strides(m, input.shape.dims, output.shape.dims), /*beta=*/0.0f); } @@ -104,31 +102,8 @@ void gpu_backward_kernel(cudaStream_t stream, TransposeAttrs const &m, GenericTensorAccessorR const &out_grad, GenericTensorAccessorW const &in_grad) { + ASSERT(get_num_dims(in_grad.shape.dims) == m.permutation.num_tensor_dims()); - TransposeStrides info; - info.num_dim = get_num_dims(in_grad.shape.dims).unwrap_nonnegative(); - assert(info.num_dim == m.perm.size()); - - LegionOrdered legion_ordered_perm = - legion_ordered_perm_from_ff_ordered(m.perm); - - for (int i = 0; i < info.num_dim; i++) { - if (i == 0) { - info.in_strides[i] = 1; - info.out_strides[i] = 1; - } else { - int in_dim_size = - dim_at_idx(out_grad.shape.dims, legion_dim_t{nonnegative_int{i}}) - .int_from_positive_int(); - int out_dim_size = - dim_at_idx(in_grad.shape.dims, legion_dim_t{nonnegative_int{i}}) - .int_from_positive_int(); - info.in_strides[i] = info.in_strides[i - 1] * in_dim_size; - info.out_strides[i] = info.out_strides[i - 1] * out_dim_size; - } - info.perm[legion_ordered_perm.at(legion_dim_t{nonnegative_int{i}}) - .value.unwrap_nonnegative()] = i; - } transpose_simple_kernel<<< GET_BLOCKS(get_num_elements(in_grad.shape.dims).int_from_positive_int()), CUDA_NUM_THREADS, @@ -136,7 +111,7 @@ void gpu_backward_kernel(cudaStream_t stream, stream>>>(get_num_elements(in_grad.shape.dims).int_from_positive_int(), out_grad.get_float_ptr(), in_grad.get_float_ptr(), - info, + make_strides(m, out_grad.shape.dims, in_grad.shape.dims), /*beta=*/1.0f); } diff --git a/lib/kernels/src/kernels/accessor.cc b/lib/kernels/src/kernels/accessor.cc index 868940bf6c..31536caf9c 100644 --- a/lib/kernels/src/kernels/accessor.cc +++ b/lib/kernels/src/kernels/accessor.cc @@ -19,7 +19,7 @@ nonnegative_int calculate_accessor_offset(TensorDimsCoord const &coord, nonnegative_int offset = 0_n; positive_int multiplier = 1_p; - for (ff_dim_t dim : reversed(get_idxs(tensor_dims.ff_ordered))) { + for (ff_dim_t dim : reversed(vector_of(get_idxs(tensor_dims.ff_ordered)))) { ASSERT(coord.ff_ordered.at(dim) < dim_at_idx(tensor_dims, dim), "Out of bounds access", dim); diff --git a/lib/kernels/src/kernels/batch_matmul_kernels.cc b/lib/kernels/src/kernels/batch_matmul_kernels.cc index 652d4fb137..a6ac364900 100644 --- a/lib/kernels/src/kernels/batch_matmul_kernels.cc +++ b/lib/kernels/src/kernels/batch_matmul_kernels.cc @@ -1,46 +1,74 @@ #include "kernels/batch_matmul_kernels.h" #include "kernels/batch_matmul_kernels_cpu.h" #include "kernels/batch_matmul_kernels_gpu.h" +#include "utils/containers/require_same.h" namespace FlexFlow::Kernels::BatchMatmul { +static std::tuple + get_params(TensorDims const &input_a_dims, + TensorDims const &input_b_dims, + TensorDims const &output_dims) { + positive_int m = require_same(dim_at_idx(input_b_dims, relative_ff_dim_t{-1}), + dim_at_idx(output_dims, relative_ff_dim_t{-1})); + + positive_int n = require_same(dim_at_idx(input_a_dims, relative_ff_dim_t{-2}), + dim_at_idx(output_dims, relative_ff_dim_t{-2})); + + positive_int k = + require_same(dim_at_idx(input_a_dims, relative_ff_dim_t{-1}), + dim_at_idx(input_b_dims, relative_ff_dim_t{-2})); + + TensorDims leading_dims = require_same( + slice_tensor_dims( + input_a_dims, relative_ff_dim_t{0}, relative_ff_dim_t{-2}), + slice_tensor_dims( + input_b_dims, relative_ff_dim_t{0}, relative_ff_dim_t{-2})); + + positive_int batch = get_num_elements(leading_dims); + + return {m, n, k, batch}; +} + void forward_kernel(device_stream_t const &stream, device_handle_t const &handle, - float *output_ptr, - float const *a_input_ptr, - float const *b_input_ptr, - int m, - int n, - int k, - int batch, - int seq_length, - int a_seq_length_dim, - int b_seq_length_dim) { + GenericTensorAccessorW const &output, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorR const &input_b, + positive_int seq_length, + std::optional a_seq_length_dim, + std::optional b_seq_length_dim) { + + auto [m, n, k, batch] = + get_params(input_a.shape.dims, input_b.shape.dims, output.shape.dims); + + auto get_raw_seq_len = [](std::optional seq_len) -> int { + return transform(seq_len, + [](positive_int x) { return x.int_from_positive_int(); }) + .value_or(-1); + }; + if (stream.is_gpu()) { gpu_forward_kernel( /*stream=*/stream.require_gpu(), /*handle=*/handle.require_for_gpu(), - /*output_ptr=*/output_ptr, - /*a_input_ptr=*/a_input_ptr, - /*b_input_ptr=*/b_input_ptr, - /*m=*/m, - /*n=*/n, - /*k=*/k, - /*batch=*/batch, - /*seq_length=*/seq_length, - /*a_seq_length_dim=*/a_seq_length_dim, - /*b_seq_length_dim=*/b_seq_length_dim); + /*output_ptr=*/output.get_float_ptr(), + /*a_input_ptr=*/input_a.get_float_ptr(), + /*b_input_ptr=*/input_b.get_float_ptr(), + /*m=*/m.int_from_positive_int(), + /*n=*/n.int_from_positive_int(), + /*k=*/k.int_from_positive_int(), + /*batch=*/batch.int_from_positive_int(), + /*seq_length=*/seq_length.int_from_positive_int(), + /*a_seq_length_dim=*/get_raw_seq_len(a_seq_length_dim), + /*b_seq_length_dim=*/get_raw_seq_len(b_seq_length_dim)); } else { ASSERT(stream.is_cpu()); ASSERT(handle.is_for_cpu()); cpu_forward_kernel( - /*output_ptr=*/output_ptr, - /*a_input_ptr=*/a_input_ptr, - /*b_input_ptr=*/b_input_ptr, - /*m=*/m, - /*n=*/n, - /*k=*/k, - /*batch=*/batch, + /*output=*/output, + /*input_a=*/input_a, + /*input_b=*/input_b, /*seq_length=*/seq_length, /*a_seq_length_dim=*/a_seq_length_dim, /*b_seq_length_dim=*/b_seq_length_dim); @@ -49,44 +77,43 @@ void forward_kernel(device_stream_t const &stream, void backward_kernel(device_stream_t const &stream, device_handle_t const &handle, - float const *o_ptr, - float const *o_grad_ptr, - float const *a_ptr, - float *a_grad_ptr, - float const *b_ptr, - float *b_grad_ptr, - int m, - int n, - int k, - int batch) { + GenericTensorAccessorR const &output, + GenericTensorAccessorR const &output_grad, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorW const &input_a_grad, + GenericTensorAccessorR const &input_b, + GenericTensorAccessorW const &input_b_grad) { + TensorShape input_a_shape = require_same(input_a.shape, input_a_grad.shape); + TensorShape input_b_shape = require_same(input_b.shape, input_b_grad.shape); + TensorShape output_shape = require_same(output.shape, output_grad.shape); + + auto [m, n, k, batch] = + get_params(input_a_shape.dims, input_b_shape.dims, output_shape.dims); + if (stream.is_gpu()) { gpu_backward_kernel( /*stream=*/stream.require_gpu(), /*handle=*/handle.require_for_gpu(), - /*o_ptr=*/o_ptr, - /*o_grad_ptr=*/o_grad_ptr, - /*a_ptr=*/a_ptr, - /*a_grad_ptr=*/a_grad_ptr, - /*b_ptr=*/b_ptr, - /*b_grad_ptr=*/b_grad_ptr, - /*m=*/m, - /*n=*/n, - /*k=*/k, - /*batch=*/batch); + /*output_ptr=*/output.get_float_ptr(), + /*output_grad_ptr=*/output_grad.get_float_ptr(), + /*input_a_ptr=*/input_a.get_float_ptr(), + /*input_a_grad_ptr=*/input_a_grad.get_float_ptr(), + /*input_b_ptr=*/input_b.get_float_ptr(), + /*input_b_grad_ptr=*/input_b_grad.get_float_ptr(), + /*m=*/m.int_from_positive_int(), + /*n=*/n.int_from_positive_int(), + /*k=*/k.int_from_positive_int(), + /*batch=*/batch.int_from_positive_int()); } else { ASSERT(stream.is_cpu()); ASSERT(handle.is_for_cpu()); cpu_backward_kernel( - /*o_ptr=*/o_ptr, - /*o_grad_ptr=*/o_grad_ptr, - /*a_ptr=*/a_ptr, - /*a_grad_ptr=*/a_grad_ptr, - /*b_ptr=*/b_ptr, - /*b_grad_ptr=*/b_grad_ptr, - /*m=*/m, - /*n=*/n, - /*k=*/k, - /*batch=*/batch); + /*output=*/output, + /*output_grad=*/output_grad, + /*input_a=*/input_a, + /*input_a_grad=*/input_a_grad, + /*input_b=*/input_b, + /*input_b_grad=*/input_b_grad); } } diff --git a/lib/kernels/src/kernels/batch_matmul_kernels_cpu.cc b/lib/kernels/src/kernels/batch_matmul_kernels_cpu.cc index f139d42992..292841d19f 100644 --- a/lib/kernels/src/kernels/batch_matmul_kernels_cpu.cc +++ b/lib/kernels/src/kernels/batch_matmul_kernels_cpu.cc @@ -2,29 +2,21 @@ namespace FlexFlow::Kernels::BatchMatmul { -void cpu_forward_kernel(float *output_ptr, - float const *a_input_ptr, - float const *b_input_ptr, - int m, - int n, - int k, - int batch, - int seq_length, - int a_seq_length_dim, - int b_seq_length_dim) { +void cpu_forward_kernel(GenericTensorAccessorW const &output, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorR const &input_b, + positive_int seq_length, + std::optional a_seq_length_dim, + std::optional b_seq_length_dim) { NOT_IMPLEMENTED(); } -void cpu_backward_kernel(float const *o_ptr, - float const *o_grad_ptr, - float const *a_ptr, - float *a_grad_ptr, - float const *b_ptr, - float *b_grad_ptr, - int m, - int n, - int k, - int batch) { +void cpu_backward_kernel(GenericTensorAccessorR const &output, + GenericTensorAccessorR const &output_grad, + GenericTensorAccessorR const &input_a, + GenericTensorAccessorW const &input_a_grad, + GenericTensorAccessorR const &input_b, + GenericTensorAccessorW const &input_b_grad) { NOT_IMPLEMENTED(); } diff --git a/lib/kernels/src/kernels/element_binary_kernels.cc b/lib/kernels/src/kernels/element_binary_kernels.cc index bea317dfec..1d8fbaaf77 100644 --- a/lib/kernels/src/kernels/element_binary_kernels.cc +++ b/lib/kernels/src/kernels/element_binary_kernels.cc @@ -1,6 +1,7 @@ #include "kernels/element_binary_kernels.h" #include "kernels/element_binary_kernels_cpu.h" #include "kernels/element_binary_kernels_gpu.h" +#include namespace FlexFlow::Kernels::ElementBinary { diff --git a/lib/kernels/src/kernels/embedding_kernels.cc b/lib/kernels/src/kernels/embedding_kernels.cc index 957d297b9e..cd6ad051bc 100644 --- a/lib/kernels/src/kernels/embedding_kernels.cc +++ b/lib/kernels/src/kernels/embedding_kernels.cc @@ -11,8 +11,8 @@ void forward_kernel(device_stream_t const &stream, DataType input_data_type, DataType output_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size) { if (stream.is_gpu()) { gpu_forward_kernel( @@ -23,8 +23,8 @@ void forward_kernel(device_stream_t const &stream, /*input_data_type=*/input_data_type, /*output_data_type=*/output_data_type, /*aggr=*/aggr, - /*in_dim=*/in_dim, - /*out_dim=*/out_dim, + /*in_dim=*/in_dim.int_from_num_tensor_dims(), + /*out_dim=*/out_dim.int_from_num_tensor_dims(), /*batch_size=*/batch_size); } else { ASSERT(stream.is_cpu()); @@ -48,8 +48,8 @@ void backward_kernel(device_stream_t const &stream, DataType output_data_type, DataType input_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size) { if (stream.is_gpu()) { gpu_backward_kernel( @@ -60,8 +60,8 @@ void backward_kernel(device_stream_t const &stream, /*output_data_type=*/output_data_type, /*input_data_type=*/input_data_type, /*aggr=*/aggr, - /*in_dim=*/in_dim, - /*out_dim=*/out_dim, + /*in_dim=*/in_dim.int_from_num_tensor_dims(), + /*out_dim=*/out_dim.int_from_num_tensor_dims(), /*batch_size=*/batch_size); } else { ASSERT(stream.is_cpu()); diff --git a/lib/kernels/src/kernels/embedding_kernels_cpu.cc b/lib/kernels/src/kernels/embedding_kernels_cpu.cc index f5df53e322..db1a696ebb 100644 --- a/lib/kernels/src/kernels/embedding_kernels_cpu.cc +++ b/lib/kernels/src/kernels/embedding_kernels_cpu.cc @@ -8,8 +8,8 @@ void cpu_forward_kernel(GenericTensorAccessorR const &input, DataType input_data_type, DataType output_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size) { NOT_IMPLEMENTED(); } @@ -20,8 +20,8 @@ void cpu_backward_kernel(GenericTensorAccessorR const &output, DataType output_data_type, DataType input_data_type, std::optional aggr, - int in_dim, - int out_dim, + num_tensor_dims_t in_dim, + num_tensor_dims_t out_dim, int batch_size) { NOT_IMPLEMENTED(); } diff --git a/lib/kernels/src/kernels/format_accessor_contents.cc b/lib/kernels/src/kernels/format_accessor_contents.cc index cbdf2870dd..513158c7e6 100644 --- a/lib/kernels/src/kernels/format_accessor_contents.cc +++ b/lib/kernels/src/kernels/format_accessor_contents.cc @@ -14,7 +14,7 @@ struct Print1DCPUAccessorR { void operator()(GenericTensorAccessorR const &accessor, std::ostream &stream) { ASSERT(accessor.device_type == DeviceType::CPU); - nonnegative_int dims = get_num_dims(accessor.shape.dims); + nonnegative_int dims = get_num_dims(accessor.shape.dims).nonnegative_int_from_num_tensor_dims(); ASSERT(dims == 1_n); positive_int ncols = dim_at_idx(accessor.shape.dims, ff_dim_t{0_n}); @@ -47,7 +47,7 @@ struct Print2DCPUAccessorR { void operator()(GenericTensorAccessorR const &accessor, std::ostream &stream) { ASSERT(accessor.device_type == DeviceType::CPU); - nonnegative_int dims = get_num_dims(accessor.shape.dims); + nonnegative_int dims = get_num_dims(accessor.shape.dims).nonnegative_int_from_num_tensor_dims(); ASSERT(dims == 2_n); positive_int dim0_size = dim_at_idx(accessor.shape.dims, ff_dim_t{0_n}); positive_int dim1_size = dim_at_idx(accessor.shape.dims, ff_dim_t{1_n}); @@ -91,7 +91,7 @@ struct Print3DCPUAccessorR { void operator()(GenericTensorAccessorR const &accessor, std::ostream &stream) { ASSERT(accessor.device_type == DeviceType::CPU); - nonnegative_int dims = get_num_dims(accessor.shape.dims); + nonnegative_int dims = get_num_dims(accessor.shape.dims).nonnegative_int_from_num_tensor_dims(); ASSERT(dims == 3_n); positive_int dim0_size = dim_at_idx(accessor.shape.dims, ff_dim_t{0_n}); @@ -150,7 +150,7 @@ struct Print4DCPUAccessorR { void operator()(GenericTensorAccessorR const &accessor, std::ostream &stream) { ASSERT(accessor.device_type == DeviceType::CPU); - nonnegative_int dims = get_num_dims(accessor.shape.dims); + nonnegative_int dims = get_num_dims(accessor.shape.dims).nonnegative_int_from_num_tensor_dims(); ASSERT(dims == 4_n); positive_int dim0_size = dim_at_idx(accessor.shape.dims, ff_dim_t{0_n}); @@ -248,7 +248,7 @@ std::string format_accessor_r_contents(GenericTensorAccessorR const &accessor) { GenericTensorAccessorR cpu_accessor = copy_tensor_accessor_r_to_cpu_if_necessary(accessor, cpu_allocator); - int num_dims = get_num_dims(cpu_accessor.shape.dims).unwrap_nonnegative(); + int num_dims = get_num_dims(cpu_accessor.shape.dims).int_from_num_tensor_dims(); switch (num_dims) { case 1: return format_1d_accessor_r_contents(cpu_accessor); @@ -268,7 +268,7 @@ std::string format_accessor_w_contents(GenericTensorAccessorW const &accessor) { GenericTensorAccessorW cpu_accessor = copy_tensor_accessor_w_to_cpu_if_necessary(accessor, cpu_allocator); - int num_dims = get_num_dims(cpu_accessor.shape.dims).unwrap_nonnegative(); + int num_dims = get_num_dims(cpu_accessor.shape.dims).int_from_num_tensor_dims(); switch (num_dims) { case 1: return format_1d_accessor_w_contents(cpu_accessor); diff --git a/lib/kernels/src/kernels/legion_dim.cc b/lib/kernels/src/kernels/legion_dim.cc index f3fa67387a..077f7a789c 100644 --- a/lib/kernels/src/kernels/legion_dim.cc +++ b/lib/kernels/src/kernels/legion_dim.cc @@ -26,15 +26,15 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value) { } legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, - nonnegative_int num_dimensions) { - return legion_dim_t{nonnegative_int{num_dimensions.unwrap_nonnegative() - + num_tensor_dims_t num_dimensions) { + return legion_dim_t{nonnegative_int{num_dimensions.int_from_num_tensor_dims() - ff_dim.value.unwrap_nonnegative() - 1}}; ; } ff_dim_t ff_dim_from_legion_dim(legion_dim_t legion_dim, - nonnegative_int num_dimensions) { - return ff_dim_t{nonnegative_int{num_dimensions.unwrap_nonnegative() - + num_tensor_dims_t num_dimensions) { + return ff_dim_t{nonnegative_int{num_dimensions.int_from_num_tensor_dims() - legion_dim.value.unwrap_nonnegative() - 1}}; } diff --git a/lib/kernels/src/kernels/pool_2d_kernels.cc b/lib/kernels/src/kernels/pool_2d_kernels.cc index 6ebfc68c86..f8f5571716 100644 --- a/lib/kernels/src/kernels/pool_2d_kernels.cc +++ b/lib/kernels/src/kernels/pool_2d_kernels.cc @@ -1,6 +1,7 @@ #include "kernels/pool_2d_kernels.h" #include "kernels/pool_2d_kernels_cpu.h" #include "kernels/pool_2d_kernels_gpu.h" +#include namespace FlexFlow::Kernels::Pool2D { diff --git a/lib/kernels/src/kernels/reduce_kernels.cc b/lib/kernels/src/kernels/reduce_kernels.cc index bd3d6a8cd1..284d07dd96 100644 --- a/lib/kernels/src/kernels/reduce_kernels.cc +++ b/lib/kernels/src/kernels/reduce_kernels.cc @@ -1,6 +1,7 @@ #include "kernels/reduce_kernels.h" #include "kernels/reduce_kernels_cpu.h" #include "kernels/reduce_kernels_gpu.h" +#include namespace FlexFlow::Kernels::Reduce { diff --git a/lib/kernels/src/kernels/reverse_kernels_params.cc b/lib/kernels/src/kernels/reverse_kernels_params.cc index cf72fb3eef..162f67d782 100644 --- a/lib/kernels/src/kernels/reverse_kernels_params.cc +++ b/lib/kernels/src/kernels/reverse_kernels_params.cc @@ -7,17 +7,17 @@ namespace FlexFlow { ReverseKernelsParams compute_reverse_kernels_params(TensorDims const &output_dims, ReverseAttrs const &attrs) { - auto axis = attrs.axis; + ff_dim_t axis = attrs.axis; positive_int in_blk_size = 1_p; positive_int reverse_dim_size = 1_p; positive_int num_out_blks = 1_p; - for (nonnegative_int i : nonnegative_range(get_num_dims(output_dims))) { - if (i < axis.value) { - in_blk_size *= dim_at_idx(output_dims, ff_dim_t{i}); - } else if (i == axis.value) { - reverse_dim_size = dim_at_idx(output_dims, ff_dim_t{i}); + for (ff_dim_t i : tensor_dims_range(get_num_dims(output_dims))) { + if (i < axis) { + in_blk_size *= dim_at_idx(output_dims, i); + } else if (i == axis) { + reverse_dim_size = dim_at_idx(output_dims, i); } else { - num_out_blks *= dim_at_idx(output_dims, ff_dim_t{i}); + num_out_blks *= dim_at_idx(output_dims, i); } } diff --git a/lib/kernels/src/perf_metrics.cc b/lib/kernels/src/perf_metrics.cc index 2036ddd35a..c9161e55b1 100644 --- a/lib/kernels/src/perf_metrics.cc +++ b/lib/kernels/src/perf_metrics.cc @@ -2,22 +2,6 @@ namespace FlexFlow { -PerfMetrics::PerfMetrics(double _start_time) - : start_time(_start_time), current_time(_start_time) {} - -PerfMetrics::PerfMetrics(int _train_all, - std::optional _train_correct, - std::optional _cce_loss, - std::optional _sparse_cce_loss, - std::optional _mse_loss, - std::optional _rmse_loss, - std::optional _mae_loss, - double _start_time_micro, - double _current_time_micro) - : train_all(_train_all), train_correct(_train_correct), cce_loss(_cce_loss), - mse_loss(_mse_loss), rmse_loss(_rmse_loss), mae_loss(_mae_loss), - start_time(_start_time_micro), current_time(_current_time_micro) {} - float get_throughput(PerfMetrics const &m) { return m.train_all / (m.current_time - m.start_time); } diff --git a/lib/kernels/test/src/test_gather_kernels.cc b/lib/kernels/test/src/test_gather_kernels.cc index d08058b063..05ae61b889 100644 --- a/lib/kernels/test/src/test_gather_kernels.cc +++ b/lib/kernels/test/src/test_gather_kernels.cc @@ -19,7 +19,7 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { TensorShape output_shape) { ff_dim_t dim = ff_dim_t{ nonnegative_int{ - get_num_dims(input_shape.dims).unwrap_nonnegative() - 1}, + get_num_dims(input_shape.dims).int_from_num_tensor_dims() - 1}, }; GatherPerDeviceState state = Kernels::Gather::gpu_init_kernel(managed_handle.raw_handle(), dim); @@ -79,7 +79,7 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { TensorShape output_shape) { ff_dim_t dim = ff_dim_t{ nonnegative_int{ - get_num_dims(input_shape.dims).unwrap_nonnegative() - 1}, + get_num_dims(input_shape.dims).int_from_num_tensor_dims() - 1}, }; GatherPerDeviceState state = Kernels::Gather::gpu_init_kernel(managed_handle.raw_handle(), dim); diff --git a/lib/kernels/test/src/test_transpose_kernel.cc b/lib/kernels/test/src/test_transpose_kernel.cc index 9d4809b2cf..04f090f4c7 100644 --- a/lib/kernels/test/src/test_transpose_kernel.cc +++ b/lib/kernels/test/src/test_transpose_kernel.cc @@ -6,9 +6,11 @@ using namespace ::FlexFlow; TEST_SUITE(FF_CUDA_TEST_SUITE) { TEST_CASE("Test Transpose Kernel Operations") { TransposeAttrs attrs = TransposeAttrs{ - FFOrdered{ - ff_dim_t{1_n}, - ff_dim_t{0_n}, + TensorDimPermutation{ + bidict{ + {ff_dim_t{1_n}, ff_dim_t{0_n}}, + {ff_dim_t{0_n}, ff_dim_t{1_n}}, + }, }, }; diff --git a/lib/local-execution/include/local-execution/atomic_task_binding.struct.toml b/lib/local-execution/include/local-execution/atomic_task_binding.struct.toml new file mode 100644 index 0000000000..002ac7be9c --- /dev/null +++ b/lib/local-execution/include/local-execution/atomic_task_binding.struct.toml @@ -0,0 +1,29 @@ +namespace = "FlexFlow" +name = "AtomicTaskBinding" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "", + "task-spec/training_tensor_slot_id_t.dtg.h", + "local-execution/atomic_training_tensor_guid_t.dtg.h", + "task-spec/slot_id_t.dtg.h", + "task-spec/concrete_arg_spec.h", +] + +src_includes = [ + "utils/ord/unordered_map.h", + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", +] + +[[fields]] +name = "tensor_bindings" +type = "std::unordered_map<::FlexFlow::training_tensor_slot_id_t, ::FlexFlow::atomic_training_tensor_guid_t>" + +[[fields]] +name = "arg_bindings" +type = "std::unordered_map<::FlexFlow::slot_id_t, ::FlexFlow::ConcreteArgSpec>" diff --git a/lib/local-execution/include/local-execution/atomic_task_invocation.struct.toml b/lib/local-execution/include/local-execution/atomic_task_invocation.struct.toml new file mode 100644 index 0000000000..fbc012abfa --- /dev/null +++ b/lib/local-execution/include/local-execution/atomic_task_invocation.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "AtomicTaskInvocation" +features = [ + "eq", + "fmt", + "hash" +] + +includes = [ + "local-execution/atomic_task_binding.dtg.h", + "task-spec/task_id_t.dtg.h" +] + + +[[fields]] +name = "task_id" +type = "::FlexFlow::task_id_t" + +[[fields]] +name = "binding" +type = "::FlexFlow::AtomicTaskBinding" diff --git a/lib/local-execution/include/local-execution/atomic_training_tensor_guid_t.struct.toml b/lib/local-execution/include/local-execution/atomic_training_tensor_guid_t.struct.toml new file mode 100644 index 0000000000..95f113d51f --- /dev/null +++ b/lib/local-execution/include/local-execution/atomic_training_tensor_guid_t.struct.toml @@ -0,0 +1,16 @@ +namespace = "FlexFlow" +name = "atomic_training_tensor_guid_t" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h" +] + +[[fields]] +name = "raw_index" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/local-execution/include/local-execution/execute_task_for_layer.h b/lib/local-execution/include/local-execution/execute_task_for_layer.h new file mode 100644 index 0000000000..d3b5f342fc --- /dev/null +++ b/lib/local-execution/include/local-execution/execute_task_for_layer.h @@ -0,0 +1,69 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_EXECUTE_TASK_FOR_LAYER_H +#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_EXECUTE_TASK_FOR_LAYER_H + +#include "local-execution/local_ready_to_launch_task.dtg.h" +#include "local-execution/local_task_registry.dtg.h" +#include "pcg/layer_guid_t.dtg.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "local-execution/local_atomic_tensor_backing.dtg.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "local-execution/local_tensor_backing.dtg.h" +#include "task-spec/training_symbolic_computation_graph.dtg.h" +#include "utils/units/milliseconds_t.h" + +namespace FlexFlow { + +LocalReadyToLaunchTask prepare_runtime_task_invocation( + RuntimeTaskInvocation const &, + LocalTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + RuntimeArgConfig const &); + +std::optional execute_init_for_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &); + +std::optional execute_forward_for_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &); + +std::optional execute_backward_for_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &); + +void execute_compute_loss( + TrainingSymbolicComputationGraph const &, + LocalTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &); + +void execute_update_for_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalTensorBacking const &, + LocalAtomicTensorBacking const &, + OptimizerAttrs const &, + Allocator &, + RuntimeArgConfig const &) { + +} // namespace FlexFlow + +#endif diff --git a/lib/local-execution/include/local-execution/local_args_backing.h b/lib/local-execution/include/local-execution/local_args_backing.h deleted file mode 100644 index 94748cf7ed..0000000000 --- a/lib/local-execution/include/local-execution/local_args_backing.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_LOCAL_ARGS_BACKING_H -#define _FLEXFLOW_LOCAL_EXECUTION_LOCAL_ARGS_BACKING_H - -#include "local-execution/local_args_backing.dtg.h" -#include "local-execution/local_task_argument_accessor.h" -#include "local-execution/local_task_registry.dtg.h" -#include "local-execution/local_tensor_backing.dtg.h" -#include "pcg/computation_graph.h" -#include "task-spec/per_device_op_state.h" -#include "task-spec/task_binding.h" -#include "task-spec/task_invocation.dtg.h" -#include "task-spec/training_computation_graph.dtg.h" -#include "task-spec/training_layer_plus_context.dtg.h" - -namespace FlexFlow { - -LocalArgsBacking make_local_computation_args_backing_with_empty_device_states( - RuntimeArgConfig const &); - -std::optional - get_per_device_op_state_if_exists(LocalArgsBacking const &, - layer_guid_t const &); - -std::unordered_map - construct_arg_slots_backing(TaskBinding const &, RuntimeArgConfig const &); - -std::optional - create_per_device_op_state(LocalTaskRegistry const &, - LocalTensorBacking const &, - RuntimeArgConfig const &, - Allocator &, - TrainingLayerPlusContext const &); - -TaskArgumentAccessor get_task_arg_accessor(LocalTensorBacking const &, - RuntimeArgConfig const &, - TaskInvocation const &, - Allocator &); - -LocalArgsBacking make_local_args_backing_for_computation_graph( - LocalTaskRegistry const &, - TrainingComputationGraph const &, - RuntimeArgConfig const &, - LocalTensorBacking const &, - Allocator &); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-execution/include/local-execution/local_args_backing.struct.toml b/lib/local-execution/include/local-execution/local_args_backing.struct.toml deleted file mode 100644 index 449f883194..0000000000 --- a/lib/local-execution/include/local-execution/local_args_backing.struct.toml +++ /dev/null @@ -1,18 +0,0 @@ -namespace = "FlexFlow" -name = "LocalArgsBacking" -features = [] - -includes = [ - "task-spec/runtime_arg_config.dtg.h", - "task-spec/device_specific_device_states.dtg.h", - "pcg/layer_guid_t.dtg.h", - "", -] - -[[fields]] -name = "runtime_arg_config" -type = "::FlexFlow::RuntimeArgConfig" - -[[fields]] -name = "per_device_op_states" -type = "std::unordered_map<::FlexFlow::layer_guid_t, std::optional<::FlexFlow::DeviceSpecificDeviceStates>>" diff --git a/lib/local-execution/include/local-execution/local_atomic_tensor_backing.h b/lib/local-execution/include/local-execution/local_atomic_tensor_backing.h new file mode 100644 index 0000000000..e97148bafb --- /dev/null +++ b/lib/local-execution/include/local-execution/local_atomic_tensor_backing.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_LOCAL_ATOMIC_TENSOR_BACKING_H +#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_LOCAL_ATOMIC_TENSOR_BACKING_H + +#include "kernels/allocation.h" +#include "local-execution/local_atomic_tensor_backing.dtg.h" +#include "local-execution/tensor_slot_backing.dtg.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "local-execution/atomic_task_invocation.dtg.h" +#include "task-spec/task_argument_accessor.h" + +namespace FlexFlow { + +std::unordered_map + construct_tensor_slots_backing_for_binding(LocalAtomicTensorBacking const &, + AtomicTaskBinding const &); + +TaskArgumentAccessor get_task_arg_accessor_for_atomic_task_invocation( + LocalAtomicTensorBacking const &, + AtomicTaskInvocation const &, + Allocator &); + +} // namespace FlexFlow + +#endif diff --git a/lib/local-execution/include/local-execution/local_atomic_tensor_backing.struct.toml b/lib/local-execution/include/local-execution/local_atomic_tensor_backing.struct.toml new file mode 100644 index 0000000000..1333df6f49 --- /dev/null +++ b/lib/local-execution/include/local-execution/local_atomic_tensor_backing.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "LocalAtomicTensorBacking" +features = [ + "eq", + "fmt", +] + +includes = [ + "kernels/accessor.h", + "local-execution/atomic_training_tensor_guid_t.dtg.h", +] + +src_includes = [ + "utils/fmt/unordered_map.h", +] + + +[[fields]] +name = "accessor_from_atomic_tensor_map" +type = "std::unordered_map<::FlexFlow::atomic_training_tensor_guid_t, ::FlexFlow::GenericTensorAccessorW>" diff --git a/lib/local-execution/include/local-execution/local_device_states_backing.h b/lib/local-execution/include/local-execution/local_device_states_backing.h new file mode 100644 index 0000000000..ca22010662 --- /dev/null +++ b/lib/local-execution/include/local-execution/local_device_states_backing.h @@ -0,0 +1,27 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_LOCAL_DEVICE_STATES_BACKING_H +#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_LOCAL_DEVICE_STATES_BACKING_H + +#include "local-execution/local_device_states_backing.dtg.h" +#include "local-execution/local_task_argument_accessor.h" +#include "local-execution/local_task_registry.dtg.h" +#include "local-execution/local_tensor_backing.dtg.h" +#include "pcg/computation_graph.h" +#include "task-spec/per_device_op_state.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.dtg.h" + +namespace FlexFlow { + +LocalDeviceStatesBacking make_local_device_states_backing_for_computation_graph( + LocalTaskRegistry const &, + std::unordered_map const &, + RuntimeArgConfig const &runtime_arg_config, + LocalTensorBacking const &, + Allocator &); + +std::optional + get_per_device_op_state_if_exists(LocalDeviceStatesBacking const &, + layer_guid_t const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/local-execution/include/local-execution/local_device_states_backing.struct.toml b/lib/local-execution/include/local-execution/local_device_states_backing.struct.toml new file mode 100644 index 0000000000..184b5741c6 --- /dev/null +++ b/lib/local-execution/include/local-execution/local_device_states_backing.struct.toml @@ -0,0 +1,13 @@ +namespace = "FlexFlow" +name = "LocalDeviceStatesBacking" +features = [] + +includes = [ + "task-spec/device_specific_per_device_op_state.dtg.h", + "pcg/layer_guid_t.dtg.h", + "", +] + +[[fields]] +name = "per_device_op_states" +type = "std::unordered_map<::FlexFlow::layer_guid_t, std::optional<::FlexFlow::DeviceSpecificPerDeviceOpState>>" diff --git a/lib/local-execution/include/local-execution/local_ready_to_launch_task.struct.toml b/lib/local-execution/include/local-execution/local_ready_to_launch_task.struct.toml new file mode 100644 index 0000000000..b944d3e12e --- /dev/null +++ b/lib/local-execution/include/local-execution/local_ready_to_launch_task.struct.toml @@ -0,0 +1,16 @@ +namespace = "FlexFlow" +name = "LocalReadyToLaunchTask" +features = [] + +includes = [ + "task-spec/task_id_t.dtg.h", + "task-spec/task_argument_accessor.h", +] + +[[fields]] +name = "task_id" +type = "::FlexFlow::task_id_t" + +[[fields]] +name = "task_arg_accessor" +type = "::FlexFlow::TaskArgumentAccessor" diff --git a/lib/local-execution/include/local-execution/local_task_argument_accessor.h b/lib/local-execution/include/local-execution/local_task_argument_accessor.h index 0ab66234eb..9142ae9992 100644 --- a/lib/local-execution/include/local-execution/local_task_argument_accessor.h +++ b/lib/local-execution/include/local-execution/local_task_argument_accessor.h @@ -2,9 +2,10 @@ #define _FLEXFLOW_LOCAL_EXECUTION_LOCAL_TASK_ARGUMENT_ACCESSOR_H #include "local-execution/tensor_slot_backing.dtg.h" +#include "pcg/device_id_t.dtg.h" #include "task-spec/runtime_arg_config.dtg.h" #include "task-spec/task_argument_accessor.h" -#include "task-spec/tensor_sub_slot_id_t.dtg.h" +#include "task-spec/training_tensor_slot_id_t.dtg.h" #include #include @@ -13,9 +14,10 @@ namespace FlexFlow { struct LocalTaskArgumentAccessor : public ITaskArgumentAccessor { explicit LocalTaskArgumentAccessor( Allocator const &allocator, - std::unordered_map const + std::unordered_map const &tensor_slots_backing, - std::unordered_map const &arg_slots_backing); + std::unordered_map const &arg_slots_backing, + size_t device_idx); LocalTaskArgumentAccessor(LocalTaskArgumentAccessor const &) = delete; LocalTaskArgumentAccessor(LocalTaskArgumentAccessor &&) = delete; @@ -24,19 +26,20 @@ struct LocalTaskArgumentAccessor : public ITaskArgumentAccessor { GenericTensorAccessor get_tensor(slot_id_t slot, Permissions priv, - TensorType tensor_type) const override; + TrainingTensorType tensor_type) const override; VariadicGenericTensorAccessor get_variadic_tensor( - slot_id_t slot, Permissions priv, TensorType tensor_type) const override; + slot_id_t slot, Permissions priv, TrainingTensorType tensor_type) const override; Allocator get_allocator() const override; - size_t get_device_idx() const override; + device_id_t get_device_idx() const override; private: Allocator allocator; - std::unordered_map + std::unordered_map tensor_slots_backing; std::unordered_map arg_slots_backing; + device_id_t device_idx; }; CHECK_RC_COPY_VIRTUAL_COMPLIANT(LocalTaskArgumentAccessor); diff --git a/lib/local-execution/include/local-execution/local_task_registry.h b/lib/local-execution/include/local-execution/local_task_registry.h index 142433ba53..6f955f9a21 100644 --- a/lib/local-execution/include/local-execution/local_task_registry.h +++ b/lib/local-execution/include/local-execution/local_task_registry.h @@ -6,18 +6,27 @@ #include "pcg/layer_attrs.dtg.h" #include "task-spec/op_task_type.dtg.h" #include "utils/units/milliseconds_t.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" namespace FlexFlow { LocalTaskRegistry construct_local_task_registry_for_layers( - std::unordered_map const &); + std::unordered_set const &); -std::optional try_get_registered_task( - LocalTaskRegistry const &, layer_guid_t const &, OpTaskType const &); +std::optional call_init_task_impl( + LocalTaskRegistry const &local_task_registry, + task_id_t task_id, + TaskArgumentAccessor const &arg_accessor); -std::optional call_task_impl(LocalTaskRegistry const &, - task_id_t const &task_id, - TaskArgumentAccessor const &acc); +std::optional call_fwb_task_impl( + LocalTaskRegistry const &local_task_registry, + task_id_t task_id, + TaskArgumentAccessor const &arg_accessor); + +void call_generic_task_impl( + LocalTaskRegistry const &local_task_registry, + task_id_t task_id, + TaskArgumentAccessor const &arg_accessor); } // namespace FlexFlow diff --git a/lib/local-execution/include/local-execution/local_task_registry.struct.toml b/lib/local-execution/include/local-execution/local_task_registry.struct.toml index 84abc7aa0c..e9b04e6ad7 100644 --- a/lib/local-execution/include/local-execution/local_task_registry.struct.toml +++ b/lib/local-execution/include/local-execution/local_task_registry.struct.toml @@ -8,8 +8,6 @@ features = [ includes = [ "task-spec/task_signature_impl.dtg.h", - "pcg/layer_guid_t.dtg.h", - "local-execution/operator_task_set.dtg.h" ] src_includes = [ @@ -17,10 +15,6 @@ src_includes = [ "utils/fmt/unordered_map.h", ] -[[fields]] -name = "task_sets" -type = "std::unordered_map<::FlexFlow::layer_guid_t, ::FlexFlow::OperatorTaskSet>" - [[fields]] name = "task_mapping" type = "std::unordered_map<::FlexFlow::task_id_t, ::FlexFlow::TaskSignatureAndImpl>" diff --git a/lib/local-execution/include/local-execution/local_tensor_backing.h b/lib/local-execution/include/local-execution/local_tensor_backing.h index 479ad4734a..876a86eca4 100644 --- a/lib/local-execution/include/local-execution/local_tensor_backing.h +++ b/lib/local-execution/include/local-execution/local_tensor_backing.h @@ -1,31 +1,32 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_LOCAL_TENSOR_BACKING_H -#define _FLEXFLOW_LOCAL_EXECUTION_LOCAL_TENSOR_BACKING_H +#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_LOCAL_TENSOR_BACKING_H +#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_LOCAL_TENSOR_BACKING_H #include "kernels/accessor.h" #include "kernels/allocation.h" +#include "local-execution/atomic_task_invocation.dtg.h" #include "local-execution/local_tensor_backing.dtg.h" #include "local-execution/tensor_slot_backing.dtg.h" -#include "task-spec/task_binding.h" -#include "task-spec/training_computation_graph.dtg.h" -#include "task-spec/training_tensor_guid_t.dtg.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/task_argument_accessor.h" +#include "task-spec/symbolic_training_tensor_guid_t.dtg.h" +#include "local-execution/atomic_training_tensor_guid_t.dtg.h" namespace FlexFlow { +AtomicTaskInvocation + lower_local_runtime_task_invocation_to_atomic_task_invocation( + LocalTensorBacking const &, + RuntimeTaskInvocation const &, + RuntimeArgConfig const &); + LocalTensorBacking construct_local_tensor_backing( - std::unordered_map const + std::unordered_map const &training_tensor_shapes, - std::unordered_map const - &preallocated_tensors, + // std::unordered_map const + // &preallocated_tensors, Allocator &); -GenericTensorAccessorW - get_accessor_for_training_tensor(LocalTensorBacking const &, - training_tensor_guid_t); - -std::unordered_map - construct_tensor_slots_backing_for_binding(LocalTensorBacking const &, - TaskBinding const &); - } // namespace FlexFlow #endif diff --git a/lib/local-execution/include/local-execution/local_tensor_backing.struct.toml b/lib/local-execution/include/local-execution/local_tensor_backing.struct.toml index 48a7a7fa90..a4095b32ad 100644 --- a/lib/local-execution/include/local-execution/local_tensor_backing.struct.toml +++ b/lib/local-execution/include/local-execution/local_tensor_backing.struct.toml @@ -2,18 +2,22 @@ namespace = "FlexFlow" name = "LocalTensorBacking" features = [ "eq", + "ord", + "hash", "fmt", ] includes = [ - "kernels/accessor.h", - "task-spec/training_tensor_guid_t.dtg.h", + "task-spec/symbolic_training_tensor_guid_t.dtg.h", + "local-execution/atomic_training_tensor_guid_t.dtg.h", ] src_includes = [ "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", + "utils/ord/unordered_map.h", ] [[fields]] -name = "backing_for_training_tensor_map" -type = "std::unordered_map<::FlexFlow::training_tensor_guid_t, ::FlexFlow::GenericTensorAccessorW>" +name = "tensor_map" +type = "std::unordered_map<::FlexFlow::symbolic_training_tensor_guid_t, ::FlexFlow::atomic_training_tensor_guid_t>" diff --git a/lib/local-execution/include/local-execution/local_training_backing.h b/lib/local-execution/include/local-execution/local_training_backing.h index f2177016fa..53b5a65bc8 100644 --- a/lib/local-execution/include/local-execution/local_training_backing.h +++ b/lib/local-execution/include/local-execution/local_training_backing.h @@ -4,38 +4,17 @@ #include "local-execution/local_training_backing.dtg.h" #include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" #include "pcg/optimizer_attrs.dtg.h" -#include "task-spec/training_computation_graph.dtg.h" -#include "task-spec/training_tensor_guid_t.dtg.h" #include "utils/units/milliseconds_t.h" namespace FlexFlow { -LocalTrainingBacking make_local_training_backing_for_computation_graph( - Allocator &allocator, - std::unordered_map const - &preallocated_tensors, - TrainingComputationGraph const &training_computation_graph, - RuntimeArgConfig const &runtime_arg_config, - OptimizerAttrs const &optimizer_attrs); - -std::optional execute_forward(LocalTaskRegistry const &, - LocalTensorBacking const &, - LocalArgsBacking const &, - TrainingLayerPlusContext const &, - Allocator &); - -std::optional execute_backward(LocalTaskRegistry const &, - LocalTensorBacking const &, - LocalArgsBacking const &, - TrainingLayerPlusContext const &, - Allocator &); - -void compute_loss(LocalTrainingBacking const &, LossAttrs const &, Allocator &); - -void execute_update(LocalTrainingBacking const &, - layer_guid_t const &, - OptimizerAttrs const &, - Allocator &); +// LocalTrainingBacking make_local_training_backing_for_computation_graph( +// Allocator &allocator, +// std::unordered_map const +// &preallocated_tensors, +// TrainingComputationGraph const &training_computation_graph, +// RuntimeArgConfig const &runtime_arg_config, +// OptimizerAttrs const &optimizer_attrs); } // namespace FlexFlow diff --git a/lib/local-execution/include/local-execution/local_training_backing.struct.toml b/lib/local-execution/include/local-execution/local_training_backing.struct.toml index 7da8c3bed6..d7345ca75c 100644 --- a/lib/local-execution/include/local-execution/local_training_backing.struct.toml +++ b/lib/local-execution/include/local-execution/local_training_backing.struct.toml @@ -3,15 +3,15 @@ name = "LocalTrainingBacking" features = [] includes = [ - "task-spec/training_computation_graph.dtg.h", + "task-spec/training_symbolic_computation_graph_from_cg_conversion.dtg.h", "local-execution/local_task_registry.h", "local-execution/local_tensor_backing.h", - "local-execution/local_args_backing.h", + "local-execution/local_device_states_backing.h", ] [[fields]] name = "training_computation_graph" -type = "::FlexFlow::TrainingComputationGraph" +type = "::FlexFlow::TrainingSymbolicComputationGraphFromCgConversion" [[fields]] name = "local_task_registry" @@ -22,5 +22,9 @@ name = "local_tensor_backing" type = "::FlexFlow::LocalTensorBacking" [[fields]] -name = "local_args_backing" -type = "::FlexFlow::LocalArgsBacking" +name = "local_device_states_backing" +type = "::FlexFlow::LocalDeviceStatesBacking" + +[[fields]] +name = "runtime_arg_config" +type = "::FlexFlow::RuntimeArgConfig" diff --git a/lib/local-execution/include/local-execution/model_training_instance.h b/lib/local-execution/include/local-execution/model_training_instance.h index bfd279fde5..82d8af7385 100644 --- a/lib/local-execution/include/local-execution/model_training_instance.h +++ b/lib/local-execution/include/local-execution/model_training_instance.h @@ -1,29 +1,34 @@ #ifndef _FLEXFLOW_LOCAL_EXECUTION_MODEL_TRAINING_INSTANCE_H #define _FLEXFLOW_LOCAL_EXECUTION_MODEL_TRAINING_INSTANCE_H +#include "local-execution/local_atomic_tensor_backing.dtg.h" #include "local-execution/local_training_backing.h" #include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" #include "pcg/tensor_guid_t.dtg.h" -#include "task-spec/loss_tensor_guid_t.dtg.h" +#include "task-spec/training_symbolic_computation_graph.dtg.h" namespace FlexFlow { struct ModelTrainingInstance { ModelTrainingInstance(Allocator const &, - LocalTrainingBacking const &, LossAttrs const &, OptimizerAttrs const &); - Allocator allocator; - LocalTrainingBacking training_backing; - LossAttrs loss_attrs; - OptimizerAttrs optimizer_attrs; - public: std::unordered_map> forward(); std::unordered_map> backward(); void update(); GenericTensorAccessorR get_loss_tensor_accessor() const; + +private: + Allocator allocator; + LossAttrs loss_attrs; + OptimizerAttrs optimizer_attrs; + TrainingSymbolicComputationGraphFromCgConversion symbolic_cg; + LocalTensorBacking local_tensor_backing; + LocalAtomicTensorBacking local_atomic_tensor_backing; + LocalTaskRegistry local_task_registry; + RuntimeArgConfig runtime_arg_config; }; } // namespace FlexFlow diff --git a/lib/local-execution/include/local-execution/tasks.h b/lib/local-execution/include/local-execution/tasks.h deleted file mode 100644 index aae3b3fe44..0000000000 --- a/lib/local-execution/include/local-execution/tasks.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_TASKS_H -#define _FLEXFLOW_LOCAL_EXECUTION_TASKS_H - -#include "task-spec/task_id_t.dtg.h" -#include -#include -#include - -namespace FlexFlow { -// PYTHON_TOP_LEVEL_TASK_ID = 11111, - -void register_flexflow_internal_tasks(); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/training_computation_graph.struct.toml b/lib/local-execution/include/local-execution/training_computation_graph.struct.toml similarity index 56% rename from lib/task-spec/include/task-spec/training_computation_graph.struct.toml rename to lib/local-execution/include/local-execution/training_computation_graph.struct.toml index 1e294df7eb..4479a43d55 100644 --- a/lib/task-spec/include/task-spec/training_computation_graph.struct.toml +++ b/lib/local-execution/include/local-execution/training_computation_graph.struct.toml @@ -6,8 +6,8 @@ includes = [ "pcg/computation_graph.h", "", "pcg/tensor_guid_t.dtg.h", - "task-spec/training_tensor_group.dtg.h", - "task-spec/loss_tensor_guid_t.dtg.h", + "task-spec/symbolic_training_tensor_group.dtg.h", + "task-spec/symbolic_loss_tensor_guid_t.dtg.h", ] [[fields]] @@ -15,8 +15,8 @@ name = "computation_graph" type = "::FlexFlow::ComputationGraph" [[fields]] -name = "training_tensor_group_for_tensor" -type = "std::unordered_map" +name = "symbolic_training_tensor_group_for_tensor" +type = "std::unordered_map" [[fields]] name = "logit_tensor" @@ -24,4 +24,4 @@ type = "::FlexFlow::tensor_guid_t" [[fields]] name = "label_tensor" -type = "::FlexFlow::loss_tensor_guid_t" +type = "::FlexFlow::atomic_loss_tensor_guid_t" diff --git a/lib/local-execution/src/local-execution/execute_task_for_layer.cc b/lib/local-execution/src/local-execution/execute_task_for_layer.cc new file mode 100644 index 0000000000..6e0618a9b1 --- /dev/null +++ b/lib/local-execution/src/local-execution/execute_task_for_layer.cc @@ -0,0 +1,212 @@ +#include "local-execution/execute_task_for_layer.h" +#include "local-execution/local_atomic_tensor_backing.h" +#include "local-execution/local_atomic_tensor_backing.h" +#include "local-execution/local_ready_to_launch_task.dtg.h" +#include "local-execution/local_task_registry.h" +#include "local-execution/local_tensor_backing.h" +#include "task-spec/fwb_op_task_type.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/training_symbolic_computation_graph.h" +#include "utils/containers/flatmap.h" + +namespace FlexFlow { + +LocalReadyToLaunchTask prepare_runtime_task_invocation( + RuntimeTaskInvocation const &runtime_task_invocation, + LocalTensorBacking const &local_tensor_backing, + LocalAtomicTensorBacking const &local_atomic_tensor_backing, + Allocator &allocator, + RuntimeArgConfig const &runtime_arg_config) { + + AtomicTaskInvocation atomic_task_invocation = + lower_local_runtime_task_invocation_to_atomic_task_invocation( + local_tensor_backing, + runtime_task_invocation, + runtime_arg_config); + + TaskArgumentAccessor task_arg_accessor = + get_task_arg_accessor_for_atomic_task_invocation( + local_atomic_tensor_backing, + atomic_task_invocation, + allocator); + + return LocalReadyToLaunchTask{ + atomic_task_invocation.task_id, + task_arg_accessor, + }; +} + +std::optional execute_init_for_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &g, + LocalTensorBacking const &tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config) { + + SymbolicCgOpAttrsAndTrainingSignatureWithShapes attrs_and_signature = + get_attrs_and_signature_for_layer(g, symbolic_layer_guid); + + RuntimeTaskInvocation runtime_task_invocation = ({ + std::optional maybe_runtime_task_invocation = + get_init_runtime_task_invocation_for_layer( + symbolic_layer_guid, + attrs_and_signature); + if (!maybe_runtime_task_invocation.has_value()) { + return std::nullopt; + } + maybe_runtime_task_invocation.value(); + }); + + LocalReadyToLaunchTask + prepared_task = prepare_runtime_task_invocation( + runtime_task_invocation, + tensor_backing, + atomic_tensor_backing, + allocator, + runtime_arg_config); + + + std::optional per_device_op_state = + call_init_task_impl( + task_registry, + prepared_task.task_id, + prepared_task.task_arg_accessor); + + return per_device_op_state; +} + +static std::optional execute_fwb_for_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &g, + LocalTensorBacking const &local_tensor_backing, + LocalAtomicTensorBacking const &local_atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &local_task_registry, + RuntimeArgConfig const &runtime_arg_config, + FwbOpTaskType task_type) { + + SymbolicCgOpAttrsAndTrainingSignatureWithShapes attrs_and_signature = + get_attrs_and_signature_for_layer(g, symbolic_layer_guid); + + OpTaskType op_task_type = assert_unwrap( + op_task_type_from_fwb_op_task_type(task_type)); + + RuntimeTaskInvocation runtime_task_invocation = ({ + std::optional maybe_runtime_task_invocation = + get_runtime_task_invocation_for_layer_and_type( + symbolic_layer_guid, + attrs_and_signature, + op_task_type); + if (!maybe_runtime_task_invocation.has_value()) { + return std::nullopt; + } + maybe_runtime_task_invocation.value(); + }); + + LocalReadyToLaunchTask + prepared_task = prepare_runtime_task_invocation( + runtime_task_invocation, + local_tensor_backing, + local_atomic_tensor_backing, + allocator, + runtime_arg_config); + + std::optional execution_time = + call_fwb_task_impl( + local_task_registry, + prepared_task.task_id, + prepared_task.task_arg_accessor); + + return execution_time; +} + +std::optional execute_forward_for_layer( + symbolic_layer_guid_t layer, + TrainingSymbolicComputationGraph const &graph, + LocalTensorBacking const &tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config) { + + return execute_fwb_for_layer(layer, graph, tensor_backing, atomic_tensor_backing, allocator, task_registry, runtime_arg_config, FwbOpTaskType::FWD); +} + +std::optional execute_backward_for_layer( + symbolic_layer_guid_t layer, + TrainingSymbolicComputationGraph const &graph, + LocalTensorBacking const &tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config) { + + return execute_fwb_for_layer(layer, graph, tensor_backing, atomic_tensor_backing, allocator, task_registry, runtime_arg_config, FwbOpTaskType::BWD); +} + +void execute_compute_loss( + TrainingSymbolicComputationGraph const &g, + LocalTensorBacking const &tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config) { + + symbolic_forward_tensor_guid_t loss_fwd_tensor = get_forward_symbolic_tensor_guid_for_symbolic_tensor_guid(g.logit_tensor); + symbolic_gradient_tensor_guid_t loss_grad_tensor = get_gradient_symbolic_tensor_guid_for_symbolic_tensor_guid(g.logit_tensor); + + RuntimeTaskInvocation invocation = get_compute_loss_runtime_task_invocation(loss_attrs, + loss_fwd_tensor, + loss_grad_tensor, + g.label_tensor); + + LocalReadyToLaunchTask + prepared_task = prepare_runtime_task_invocation( + runtime_task_invocation, + local_tensor_backing, + local_atomic_tensor_backing, + allocator, + runtime_arg_config); + + call_generic_task_impl(task_registry, + prepared_task.task_id, + prepared_task.task_arg_accessor); +} + +void execute_update_for_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &graph, + LocalTensorBacking const &tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + OptimizerAttrs const &optimizer_attrs, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config) { + + SymbolicTrainingLayerAttrsPlusContext attrs_plus_context = + get_symbolic_training_layer_attrs_plus_context(graph, symbolic_layer_guid); + + RuntimeTaskInvocation invocation = ({ + std::optional maybe_invocation = get_update_runtime_task_invocation_for_layer(attrs_plus_context, optimizer_attrs); + if (!maybe_invocation.has_value()) { + return; + } + maybe_invocation.value(); + }); + + LocalReadyToLaunchTask + prepared_task = prepare_runtime_task_invocation( + runtime_task_invocation, + local_tensor_backing, + local_atomic_tensor_backing, + allocator, + runtime_arg_config); + + call_generic_task_impl(task_registry, + prepared_task.task_id, + prepared_task.task_arg_accessor); +} + +} // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_args_backing.cc b/lib/local-execution/src/local-execution/local_args_backing.cc deleted file mode 100644 index eb1c7b067e..0000000000 --- a/lib/local-execution/src/local-execution/local_args_backing.cc +++ /dev/null @@ -1,109 +0,0 @@ -#include "local-execution/local_args_backing.h" -#include "local-execution/local_task_registry.h" -#include "local-execution/local_tensor_backing.h" -#include "op-attrs/parallel_tensor_shape.h" -#include "task-spec/op_task_to_task_invocation.h" -#include "task-spec/task_signature_impl.h" -#include "task-spec/training_computation_graph.h" -#include "task-spec/training_layer_plus_context.h" -#include "utils/containers/contains_key.h" -#include "utils/containers/generate_map.h" -#include "utils/containers/map_values.h" -#include "utils/containers/try_at.h" -#include "utils/overload.h" - -namespace FlexFlow { - -std::optional get_per_device_op_state_if_exists( - LocalArgsBacking const &local_args_backing, - layer_guid_t const &layer_guid) { - - return local_args_backing.per_device_op_states.at(layer_guid); -} - -std::unordered_map - construct_arg_slots_backing(TaskBinding const &binding, - RuntimeArgConfig const &runtime_arg_config) { - return map_values( - binding.get_arg_bindings(), [&](TaskArgSpec const &arg_binding) { - return arg_binding.template visit( - overload{[&](RuntimeArgRefSpec const &s) { - return lower_to_concrete_arg_spec(s, runtime_arg_config); - }, - [](ConcreteArgSpec const &s) { return s; }}); - }); - ; -} - -std::optional - create_per_device_op_state(LocalTaskRegistry const &local_task_registry, - LocalTensorBacking const &tensor_backing, - RuntimeArgConfig const &runtime_arg_config, - Allocator &allocator, - TrainingLayerPlusContext const &training_layer) { - std::optional maybe_registered_task = try_get_registered_task( - local_task_registry, training_layer.layer_guid, OpTaskType::INIT); - - ASSERT(maybe_registered_task.has_value()); - - registered_task_t registered_task = maybe_registered_task.value(); - if (registered_task.is_noop_task()) { - return std::nullopt; - } - - TaskInvocation invocation = lower_to_task_invocation( - /*op_task_invocation=*/get_init_op_task_invocation( - training_layer.layer_attrs.op_attrs), - /*training_layer=*/training_layer, - /*device_specific_device_states=*/std::nullopt); - - TaskArgumentAccessor accessor = get_task_arg_accessor( - tensor_backing, runtime_arg_config, invocation, allocator); - TaskSignatureAndImpl task_sig_impl = - local_task_registry.task_mapping.at(invocation.task_id); - auto fn = - task_sig_impl.impl_function.get().function_ptr; - DeviceSpecificDeviceStates device_state = fn(accessor); - return device_state; -} - -TaskArgumentAccessor - get_task_arg_accessor(LocalTensorBacking const &local_tensor_backing, - RuntimeArgConfig const &runtime_arg_config, - TaskInvocation const &invocation, - Allocator &allocator) { - std::unordered_map - tensor_slots_backing = construct_tensor_slots_backing_for_binding( - local_tensor_backing, invocation.binding); - std::unordered_map arg_slots_backing = - construct_arg_slots_backing(invocation.binding, runtime_arg_config); - return TaskArgumentAccessor::create( - allocator, tensor_slots_backing, arg_slots_backing); -} - -LocalArgsBacking make_local_args_backing_for_computation_graph( - LocalTaskRegistry const &task_registry, - TrainingComputationGraph const &training_computation_graph, - RuntimeArgConfig const &runtime_arg_config, - LocalTensorBacking const &local_tensor_backing, - Allocator &allocator) { - std::unordered_map> - per_device_op_states = generate_map( - topological_ordering(training_computation_graph.computation_graph), - [&](layer_guid_t const &layer_guid) { - return create_per_device_op_state( - task_registry, - local_tensor_backing, - runtime_arg_config, - allocator, - get_training_layer_plus_context(training_computation_graph, - layer_guid)); - }); - - return LocalArgsBacking{ - runtime_arg_config, - per_device_op_states, - }; -} - -} // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_atomic_tensor_backing.cc b/lib/local-execution/src/local-execution/local_atomic_tensor_backing.cc new file mode 100644 index 0000000000..d2f466ff58 --- /dev/null +++ b/lib/local-execution/src/local-execution/local_atomic_tensor_backing.cc @@ -0,0 +1,36 @@ +#include "local-execution/local_atomic_tensor_backing.h" +#include "local-execution/local_task_argument_accessor.h" +#include "utils/containers/map_values.h" + +namespace FlexFlow { + +std::unordered_map + construct_tensor_slots_backing_for_binding(LocalAtomicTensorBacking const &tensor_backing, + AtomicTaskBinding const &binding) { + return map_values( + binding.tensor_bindings, + [&](atomic_training_tensor_guid_t t) -> TensorSlotBacking { + return TensorSlotBacking{ + tensor_backing.accessor_from_atomic_tensor_map.at(t), + }; + }); +} + +TaskArgumentAccessor get_task_arg_accessor_for_atomic_task_invocation( + LocalAtomicTensorBacking const &local_tensor_backing, + AtomicTaskInvocation const &invocation, + Allocator &allocator) { + + std::unordered_map + tensor_slots_backing = construct_tensor_slots_backing_for_binding( + local_tensor_backing, invocation.binding); + + std::unordered_map arg_slots_backing = + invocation.binding.arg_bindings; + + return TaskArgumentAccessor::create( + allocator, tensor_slots_backing, arg_slots_backing, 0); +} + + +} // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_cost_estimator.cc b/lib/local-execution/src/local-execution/local_cost_estimator.cc index 6517dbfdbc..4f84863a3f 100644 --- a/lib/local-execution/src/local-execution/local_cost_estimator.cc +++ b/lib/local-execution/src/local-execution/local_cost_estimator.cc @@ -11,10 +11,6 @@ #include "pcg/computation_graph/layer_added_result.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/parallel_tensor_attrs.h" -#include "task-spec/forward_tensor_source.h" -#include "task-spec/gradient_tensor_source.h" -#include "task-spec/optimizer_tensor_source.h" -#include "task-spec/training_computation_graph.h" #include "utils/containers/concat_vectors.h" #include "utils/containers/get_only.h" #include "utils/containers/sum.h" @@ -26,140 +22,144 @@ namespace FlexFlow { LocalCostEstimator::LocalCostEstimator(RuntimeArgConfig const &config) : runtime_arg_config(config) {} -static TrainingComputationGraph - create_computation_graph_for_local_cost_estimation( - PCGOperatorAttrs const &op, - OptimizerAttrs const &optimizer_attrs, - std::vector const &inputs, - std::vector const &weights, - std::vector const &outputs) { - ComputationGraph computation_graph = make_empty_computation_graph(); - - std::vector input_tensors; - for (ParallelTensorShape const &input : inputs) { - LayerAddedResult inputs_layer = add_layer( - computation_graph, - LayerAttrs{ComputationGraphOpAttrs{InputAttrs{get_piece_shape(input)}}, - std::nullopt}, - {}, - {}); - input_tensors.push_back(get_only(inputs_layer.outputs)); - } - - std::vector weight_tensors; - for (ParallelTensorShape const &weight : weights) { - LayerAddedResult weights_layer = - add_layer(computation_graph, - LayerAttrs{ComputationGraphOpAttrs{WeightAttrs{ - get_piece_shape(weight), - InitializerAttrs{ZeroInitializerAttrs{}}}}, - std::nullopt}, - {}, - {}); - weight_tensors.push_back(get_only(weights_layer.outputs)); - } - - // create operator layer - LayerAddedResult operator_layer = add_layer( - computation_graph, - LayerAttrs{compgraph_op_attrs_from_pcg_op_attrs(op), "operator"}, - input_tensors, - weight_tensors); - - ForwardTensorSource forward_tensor_source; - GradientTensorSource gradient_tensor_source; - OptimizerTensorSource optimizer_tensor_source; - LossTensorSource loss_tensor_source; - - TrainingComputationGraph training_cg = generate_training_computation_graph( - /*computation_graph=*/computation_graph, - /*optimizer_attrs=*/optimizer_attrs, - /*logit_tensor=*/operator_layer.outputs.at(0), - /*forward_tensor_source=*/forward_tensor_source, - /*gradient_tensor_source=*/gradient_tensor_source, - /*optimizer_tensor_source=*/optimizer_tensor_source, - /*loss_tensor_source=*/loss_tensor_source); - - return training_cg; -} - -OpCostMetrics LocalCostEstimator::estimate_cost( - OpCostEstimateKey const &op_cost_estimate_key) const { - - PCGOperatorAttrs op = op_cost_estimate_key.op_attrs; - std::vector inputs = op_cost_estimate_key.input_shapes; - std::vector weights = op_cost_estimate_key.weight_shapes; - std::vector outputs = op_cost_estimate_key.output_shapes; - MachineView mv = op_cost_estimate_key.machine_view; - - if (is_parallel_op(op) || op.has() || op.has() || - op.has()) { - return OpCostMetrics{ - /*forward_runtime=*/0_ms, - /*backward_runtime=*/0_ms, - /*memory=*/0_bytes, - }; - } - - TrainingComputationGraph training_cg = - create_computation_graph_for_local_cost_estimation( - /*op=*/op, - /*optimizer_attrs=*/op_cost_estimate_key.optimizer_attrs, - /*inputs=*/inputs, - /*weights=*/weights, - /*outputs=*/outputs); - - // allocate memory - std::shared_ptr tracked_allocator_ptr = - std::make_shared(create_local_allocator_for_device_type( - runtime_arg_config.kernel_device_type)); - Allocator allocator = Allocator(tracked_allocator_ptr); - - LocalTrainingBacking local_backing = - make_local_training_backing_for_computation_graph( - /*allocator=*/allocator, - /*preallocated_tensors=*/{}, - /*training_computation_graph=*/training_cg, - /*runtime_arg_config=*/this->runtime_arg_config, - /*optimizer_attrs=*/op_cost_estimate_key.optimizer_attrs); - - // execute layer - layer_guid_t operator_layer_guid = - get_layer_by_name(training_cg.computation_graph, "operator"); - - milliseconds_t fwd = execute_forward(local_backing.local_task_registry, - local_backing.local_tensor_backing, - local_backing.local_args_backing, - get_training_layer_plus_context( - training_cg, operator_layer_guid), - allocator) - .value(); - milliseconds_t bwd = execute_backward(local_backing.local_task_registry, - local_backing.local_tensor_backing, - local_backing.local_args_backing, - get_training_layer_plus_context( - training_cg, operator_layer_guid), - allocator) - .value(); - - return OpCostMetrics{ - /*forward_runtime=*/fwd, - /*backward_runtime=*/bwd, - /*memory=*/tracked_allocator_ptr->get_current_mem_usage(), - }; -} - -milliseconds_t LocalCostEstimator::estimate_cost( - TensorSetMovement const &tensor_set_movement) const { - // TODO: model communication cost analytically - // https://github.com/flexflow/FlexFlow/issues/1414 - - NOT_IMPLEMENTED(); -} - -CostEstimator - get_local_cost_estimator(RuntimeArgConfig const &runtime_arg_config) { - return CostEstimator::create(runtime_arg_config); -} +// TODO(@lockshaw)(#pr) +// static TrainingComputationGraph +// create_computation_graph_for_local_cost_estimation( +// PCGOperatorAttrs const &op, +// OptimizerAttrs const &optimizer_attrs, +// std::vector const &inputs, +// std::vector const &weights, +// std::vector const &outputs) { +// ComputationGraph computation_graph = make_empty_computation_graph(); +// +// std::vector input_tensors; +// for (ParallelTensorShape const &input : inputs) { +// LayerAddedResult inputs_layer = add_layer( +// computation_graph, +// LayerAttrs{ComputationGraphOpAttrs{InputAttrs{get_piece_shape(input)}}, +// std::nullopt}, +// {}, +// {}); +// input_tensors.push_back(get_only(inputs_layer.outputs)); +// } +// +// std::vector weight_tensors; +// for (ParallelTensorShape const &weight : weights) { +// LayerAddedResult weights_layer = +// add_layer(computation_graph, +// LayerAttrs{ComputationGraphOpAttrs{WeightAttrs{ +// get_piece_shape(weight), +// InitializerAttrs{ZeroInitializerAttrs{}}}}, +// std::nullopt}, +// {}, +// {}); +// weight_tensors.push_back(get_only(weights_layer.outputs)); +// } +// +// // create operator layer +// LayerAddedResult operator_layer = add_layer( +// computation_graph, +// LayerAttrs{ +// /*op_attrs=*/compgraph_op_attrs_from_pcg_op_attrs(op).value(), +// /*name=*/"operator", +// }, +// input_tensors, +// weight_tensors); +// +// ForwardTensorSource forward_tensor_source; +// GradientTensorSource gradient_tensor_source; +// OptimizerTensorSource optimizer_tensor_source; +// LossTensorSource loss_tensor_source; +// +// TrainingComputationGraph training_cg = generate_training_computation_graph( +// /*computation_graph=*/computation_graph, +// /*optimizer_attrs=*/optimizer_attrs, +// /*logit_tensor=*/operator_layer.outputs.at(0), +// /*forward_tensor_source=*/forward_tensor_source, +// /*gradient_tensor_source=*/gradient_tensor_source, +// /*optimizer_tensor_source=*/optimizer_tensor_source, +// /*loss_tensor_source=*/loss_tensor_source); +// +// return training_cg; +// } + +// OpCostMetrics LocalCostEstimator::estimate_cost( +// OpCostEstimateKey const &op_cost_estimate_key) const { +// +// PCGOperatorAttrs op = op_cost_estimate_key.op_attrs; +// std::vector inputs = op_cost_estimate_key.input_shapes; +// std::vector weights = op_cost_estimate_key.weight_shapes; +// std::vector outputs = op_cost_estimate_key.output_shapes; +// MachineView mv = op_cost_estimate_key.machine_view; +// +// if (is_parallel_op(op) || op.has() || op.has() || +// op.has()) { +// return OpCostMetrics{ +// /*forward_runtime=*/0_ms, +// /*backward_runtime=*/0_ms, +// /*memory=*/0_bytes, +// }; +// } +// +// TrainingComputationGraph training_cg = +// create_computation_graph_for_local_cost_estimation( +// /*op=*/op, +// /*optimizer_attrs=*/op_cost_estimate_key.optimizer_attrs, +// /*inputs=*/inputs, +// /*weights=*/weights, +// /*outputs=*/outputs); +// +// // allocate memory +// std::shared_ptr tracked_allocator_ptr = +// std::make_shared(create_local_allocator_for_device_type( +// runtime_arg_config.kernel_device_type)); +// Allocator allocator = Allocator(tracked_allocator_ptr); +// +// LocalTrainingBacking local_backing = +// make_local_training_backing_for_computation_graph( +// /*allocator=*/allocator, +// /*preallocated_tensors=*/{}, +// /*training_computation_graph=*/training_cg, +// /*runtime_arg_config=*/this->runtime_arg_config, +// /*optimizer_attrs=*/op_cost_estimate_key.optimizer_attrs); +// +// // execute layer +// layer_guid_t operator_layer_guid = +// get_layer_by_name(training_cg.computation_graph, "operator"); +// +// milliseconds_t fwd = execute_forward(local_backing.local_task_registry, +// local_backing.local_tensor_backing, +// local_backing.local_args_backing, +// get_training_layer_plus_context( +// training_cg, operator_layer_guid), +// allocator) +// .value(); +// milliseconds_t bwd = execute_backward(local_backing.local_task_registry, +// local_backing.local_tensor_backing, +// local_backing.local_args_backing, +// get_training_layer_plus_context( +// training_cg, operator_layer_guid), +// allocator) +// .value(); +// +// return OpCostMetrics{ +// /*forward_runtime=*/fwd, +// /*backward_runtime=*/bwd, +// /*memory=*/tracked_allocator_ptr->get_current_mem_usage(), +// }; +// } +// +// milliseconds_t LocalCostEstimator::estimate_cost( +// TensorSetMovement const &tensor_set_movement) const { +// // TODO: model communication cost analytically +// // https://github.com/flexflow/FlexFlow/issues/1414 +// +// NOT_IMPLEMENTED(); +// } +// +// CostEstimator +// get_local_cost_estimator(RuntimeArgConfig const &runtime_arg_config) { +// return CostEstimator::create(runtime_arg_config); +// } } // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_device_states_backing.cc b/lib/local-execution/src/local-execution/local_device_states_backing.cc new file mode 100644 index 0000000000..0ce9b84e14 --- /dev/null +++ b/lib/local-execution/src/local-execution/local_device_states_backing.cc @@ -0,0 +1,46 @@ +#include "local-execution/local_device_states_backing.h" +#include "local-execution/local_task_registry.h" +#include "local-execution/local_tensor_backing.h" +#include "local-execution/local_training_backing.h" +#include "task-spec/task_signature_impl.h" +#include "utils/containers/generate_map.h" +#include "utils/overload.h" +#include "utils/containers/keys.h" + +namespace FlexFlow { + +// LocalDeviceStatesBacking make_local_device_states_backing_for_computation_graph( +// LocalTaskRegistry const &task_registry, +// std::unordered_map const &layers, +// std::unordered_map const &op_attrs, +// RuntimeArgConfig const &runtime_arg_config, +// LocalTensorBacking const &local_tensor_backing, +// Allocator &allocator) { +// +// std::unordered_map> +// per_device_op_states = generate_map( +// keys(layers), +// [&](layer_guid_t const &layer_guid) -> std::optional { +// return create_per_device_op_state( +// task_registry, +// local_tensor_backing, +// runtime_arg_config, +// allocator, +// op_attrs, +// layers.at(layer_guid)); +// }); +// +// return LocalDeviceStatesBacking{ +// per_device_op_states, +// }; +// } + + +// std::optional get_per_device_op_state_if_exists( +// LocalArgsBacking const &local_args_backing, +// layer_guid_t const &layer_guid) { +// +// return local_args_backing.per_device_op_states.at(layer_guid); +// } + +} // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_task_registry.cc b/lib/local-execution/src/local-execution/local_task_registry.cc index d482736a5b..a33781b1bb 100644 --- a/lib/local-execution/src/local-execution/local_task_registry.cc +++ b/lib/local-execution/src/local-execution/local_task_registry.cc @@ -38,27 +38,47 @@ LocalTaskRegistry construct_local_task_registry_for_layers( }; } -std::optional - try_get_registered_task(LocalTaskRegistry const &task_registry, - layer_guid_t const &layer_guid, - OpTaskType const &op_task_type) { - if (!contains_key(task_registry.task_sets, layer_guid)) { +std::optional call_init_task_impl( + LocalTaskRegistry const &local_task_registry, + registered_task_t registered_task, + TaskArgumentAccessor const &arg_accessor) { + + if (registered_task.is_noop_task()) { return std::nullopt; } - return get_task_for_task_type(task_registry.task_sets.at(layer_guid), - op_task_type); + task_id_t task_id = registered_task.require_real_task(); + + TaskSignatureAndImpl task_sig_impl = + local_task_registry.task_mapping.at(task_id); + + auto fn = + task_sig_impl.impl_function.get().function_ptr; + + std::optional device_state = fn(arg_accessor); + + return device_state; } std::optional - call_task_impl(LocalTaskRegistry const &task_registry, + call_fwb_task_impl(LocalTaskRegistry const &task_registry, task_id_t const &task_id, TaskArgumentAccessor const &acc) { TaskSignatureAndImpl task_sig_impl = task_registry.task_mapping.at(task_id); auto fn = task_sig_impl.impl_function.get().function_ptr; - return transform( - fn(acc), [](float running_time) { return milliseconds_t{running_time}; }); + + return fn(acc); +} + +void call_generic_task_impl(LocalTaskRegistry const &task_registry, + task_id_t const &task_id, + TaskArgumentAccessor const &acc) { + TaskSignatureAndImpl task_sig_impl = task_registry.task_mapping.at(task_id); + auto fn = + task_sig_impl.impl_function.get().function_ptr; + + fn(acc); } } // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_tensor_backing.cc b/lib/local-execution/src/local-execution/local_tensor_backing.cc index be8e44736c..5d5a3172f9 100644 --- a/lib/local-execution/src/local-execution/local_tensor_backing.cc +++ b/lib/local-execution/src/local-execution/local_tensor_backing.cc @@ -2,73 +2,60 @@ #include "op-attrs/parallel_tensor_shape.h" #include "pcg/computation_graph.h" #include "pcg/optimizer_attrs.h" -#include "task-spec/slot_grad_id.dtg.h" -#include "task-spec/training_computation_graph.h" +#include "task-spec/fwb_tensor_slot_id_t.dtg.h" #include "utils/containers/contains_key.h" #include "utils/containers/generate_map.h" #include "utils/containers/is_submapeq_of.h" #include "utils/containers/is_subseteq_of.h" #include "utils/containers/keys.h" #include "utils/containers/map_values.h" -#include "utils/containers/merge_maps.h" #include "utils/containers/set_minus.h" #include "utils/containers/set_of.h" #include "utils/overload.h" namespace FlexFlow { -LocalTensorBacking construct_local_tensor_backing( - std::unordered_map const - &training_tensor_shapes, - std::unordered_map const - &preallocated, - Allocator &allocator) { - - ASSERT(is_subseteq_of(keys(preallocated), keys(training_tensor_shapes))); - - std::unordered_set to_allocate = - set_minus(keys(training_tensor_shapes), keys(preallocated)); - - std::unordered_map allocated = - generate_map(to_allocate, [&](training_tensor_guid_t t) { - TensorShape shape = training_tensor_shapes.at(t); - return allocator.allocate_tensor(shape); - }); - - std::unordered_map - backing_for_training_tensor_map = - merge_disjoint_maps(allocated, preallocated); - - ASSERT(is_submapeq_of(preallocated, backing_for_training_tensor_map)); - - ASSERT(keys(backing_for_training_tensor_map) == keys(training_tensor_shapes), - backing_for_training_tensor_map.size(), - training_tensor_shapes.size(), - keys(preallocated)); - - return LocalTensorBacking{ - backing_for_training_tensor_map, - }; -} - -GenericTensorAccessorW get_accessor_for_training_tensor( - LocalTensorBacking const &local_tensor_backing, - training_tensor_guid_t training_tensor) { - return local_tensor_backing.backing_for_training_tensor_map.at( - training_tensor); +// LocalTensorBacking construct_local_tensor_backing( +// std::unordered_map const +// &training_tensor_shapes, +// std::unordered_map const +// &preallocated, +// Allocator &allocator) { +// +// ASSERT(is_subseteq_of(keys(preallocated), keys(training_tensor_shapes))); +// +// std::unordered_set to_allocate = +// set_minus(keys(training_tensor_shapes), keys(preallocated)); +// +// std::unordered_map allocated = +// generate_map(to_allocate, [&](training_tensor_guid_t t) { +// TensorShape shape = training_tensor_shapes.at(t); +// return allocator.allocate_tensor(shape); +// }); +// +// std::unordered_map +// backing_for_training_tensor_map = +// merge_disjoint_maps(allocated, preallocated); +// +// ASSERT(is_submapeq_of(preallocated, backing_for_training_tensor_map)); +// +// ASSERT(keys(backing_for_training_tensor_map) == keys(training_tensor_shapes), +// backing_for_training_tensor_map.size(), +// training_tensor_shapes.size(), +// keys(preallocated)); +// +// return LocalTensorBacking{ +// backing_for_training_tensor_map, +// }; +// } + +AtomicTaskInvocation + lower_local_runtime_task_invocation_to_atomic_task_invocation( + LocalTensorBacking const &, + RuntimeTaskInvocation const &, + RuntimeArgConfig const &) { + NOT_IMPLEMENTED(); } -std::unordered_map - construct_tensor_slots_backing_for_binding( - LocalTensorBacking const &local_tensor_backing, - TaskBinding const &binding) { - - return map_values( - binding.get_tensor_bindings(), [&](training_tensor_guid_t t) { - return TensorSlotBacking{ - get_accessor_for_training_tensor(local_tensor_backing, t), - }; - }); -} } // namespace FlexFlow diff --git a/lib/local-execution/src/local-execution/local_training_backing.cc b/lib/local-execution/src/local-execution/local_training_backing.cc index 1aac8506f2..424566fac7 100644 --- a/lib/local-execution/src/local-execution/local_training_backing.cc +++ b/lib/local-execution/src/local-execution/local_training_backing.cc @@ -20,7 +20,7 @@ namespace FlexFlow { LocalTrainingBacking make_local_training_backing_for_computation_graph( Allocator &allocator, - std::unordered_map const + std::unordered_map const &preallocated, TrainingComputationGraph const &training_computation_graph, RuntimeArgConfig const &runtime_arg_config, @@ -54,96 +54,36 @@ LocalTrainingBacking make_local_training_backing_for_computation_graph( }; } -std::optional - execute_forward(LocalTaskRegistry const &local_task_registry, - LocalTensorBacking const &local_tensor_backing, - LocalArgsBacking const &local_args_backing, - TrainingLayerPlusContext const &training_layer, - Allocator &allocator) { - - std::optional maybe_registered_task = try_get_registered_task( - local_task_registry, training_layer.layer_guid, OpTaskType::BWD); - - ASSERT(maybe_registered_task.has_value()); - - registered_task_t registered_task = maybe_registered_task.value(); - if (registered_task.is_noop_task()) { - return std::nullopt; - } - - std::optional device_state = - get_per_device_op_state_if_exists(local_args_backing, - training_layer.layer_guid); - - TaskInvocation invocation = lower_to_task_invocation( - /*op_task_invocation=*/get_forward_op_task_invocation( - training_layer.layer_attrs.op_attrs), - /*training_layer=*/training_layer, - /*device_specific_device_states=*/device_state); - - TaskArgumentAccessor accessor = - get_task_arg_accessor(local_tensor_backing, - local_args_backing.runtime_arg_config, - invocation, - allocator); - return call_task_impl(local_task_registry, invocation.task_id, accessor); -} - -void compute_loss(LocalTrainingBacking const &local_training_backing, - LossAttrs const &loss_attrs, - Allocator &allocator) { - - TrainingComputationGraph training_cg = - local_training_backing.training_computation_graph; - tensor_guid_t logit_tensor = training_cg.logit_tensor; - loss_tensor_guid_t label_tensor = training_cg.label_tensor; - - TaskInvocation loss_invocation = backward( - loss_attrs, - get_forward_tensor_guid_for_tensor_guid(training_cg, logit_tensor), - get_gradient_tensor_guid_for_tensor_guid(training_cg, logit_tensor), - label_tensor); - // TODO: https://github.com/flexflow/flexflow-train/issues/1442 - // assert(is_invocation_valid(get_loss_bwd_signature(), loss_invocation)); - TaskArgumentAccessor loss_accessor = get_task_arg_accessor( - local_training_backing.local_tensor_backing, - local_training_backing.local_args_backing.runtime_arg_config, - loss_invocation, - allocator); - TaskImplFunction loss_impl_fn = get_loss_bwd_task_impl(); - loss_impl_fn.get().function_ptr(loss_accessor); -} - -std::optional - execute_backward(LocalTaskRegistry const &local_task_registry, - LocalTensorBacking const &local_tensor_backing, - LocalArgsBacking const &local_args_backing, - TrainingLayerPlusContext const &training_layer, - Allocator &allocator) { +std::optional + create_per_device_op_state(LocalTaskRegistry const &local_task_registry, + LocalTensorBacking const &tensor_backing, + RuntimeArgConfig const &runtime_arg_config, + Allocator &allocator, + layer_guid_t layer_id, + ComputationGraphOpAttrs const &op_attrs, + SymbolicLayerTrainingTensorGroupSignatureWithShapes const &layer_signature) { std::optional maybe_registered_task = try_get_registered_task( - local_task_registry, training_layer.layer_guid, OpTaskType::BWD); + local_task_registry, layer_id, OpTaskType::INIT); ASSERT(maybe_registered_task.has_value()); - registered_task_t registered_task = maybe_registered_task.value(); - if (registered_task.is_noop_task()) { - return std::nullopt; - } - std::optional device_state = - get_per_device_op_state_if_exists(local_args_backing, - training_layer.layer_guid); - TaskInvocation invocation = lower_to_task_invocation( - get_backward_op_task_invocation(training_layer.layer_attrs.op_attrs), - training_layer, - device_state); - TaskArgumentAccessor accessor = - get_task_arg_accessor(local_tensor_backing, - local_args_backing.runtime_arg_config, - invocation, - allocator); - return call_task_impl(local_task_registry, invocation.task_id, accessor); + OpTaskInvocation op_init_task_invocation = + get_init_op_task_invocation(op_attrs); + + TaskInvocation invocation = lower_op_task_invocation_to_task_invocation( + /*op_task_invocation=*/op_init_task_invocation, + /*layer_signature=*/layer_signature, + /*device_specific_device_states=*/std::nullopt); + + TaskArgumentAccessor accessor = get_task_arg_accessor_for_invocation( + tensor_backing, runtime_arg_config, invocation, allocator); + + return call_init_task_impl( + local_task_registry, + registered_task, + accessor); } void execute_update(LocalTrainingBacking const &local_training_backing, diff --git a/lib/local-execution/src/local-execution/model_training_instance.cc b/lib/local-execution/src/local-execution/model_training_instance.cc index be2791a365..baf31d3b1c 100644 --- a/lib/local-execution/src/local-execution/model_training_instance.cc +++ b/lib/local-execution/src/local-execution/model_training_instance.cc @@ -1,18 +1,23 @@ #include "local-execution/model_training_instance.h" +#include "local-execution/execute_task_for_layer.h" +#include "local-execution/local_atomic_tensor_backing.h" #include "pcg/computation_graph.h" #include "pcg/optimizer_attrs.h" #include "task-spec/training_computation_graph.h" +#include "task-spec/training_symbolic_computation_graph.h" +#include "utils/containers/flatmap.h" #include "utils/containers/reversed.h" +#include "local-execution/local_ready_to_launch_task.dtg.h" namespace FlexFlow { ModelTrainingInstance::ModelTrainingInstance( Allocator const &allocator, - LocalTrainingBacking const &local_training_backing, LossAttrs const &loss_attrs, OptimizerAttrs const &optimizer_attrs) - : allocator(allocator), training_backing(local_training_backing), - loss_attrs(loss_attrs), optimizer_attrs(optimizer_attrs) {} + : allocator(allocator), + loss_attrs(loss_attrs), + optimizer_attrs(optimizer_attrs) {} std::unordered_map> ModelTrainingInstance::forward() { @@ -20,17 +25,19 @@ std::unordered_map> std::unordered_map> per_layer_elapsed_time; - for (layer_guid_t const &layer_guid : - topological_ordering(this->training_backing.training_computation_graph - .computation_graph)) { - std::optional elapsed_time = execute_forward( - this->training_backing.local_task_registry, - this->training_backing.local_tensor_backing, - this->training_backing.local_args_backing, - get_training_layer_plus_context( - this->training_backing.training_computation_graph, layer_guid), - this->allocator); + for (symbolic_layer_guid_t symbolic_layer_guid : + symbolic_cg_topological_ordering(this->symbolic_cg.training_symbolic_computation_graph)) { + std::optional elapsed_time = execute_forward_for_layer( + symbolic_layer_guid, + this->symbolic_cg.training_symbolic_computation_graph, + this->local_tensor_backing, + this->local_atomic_tensor_backing, + this->allocator, + this->local_task_registry, + this->runtime_arg_config); + + layer_guid_t layer_guid = this->symbolic_cg.layer_mapping.at_r(symbolic_layer_guid); per_layer_elapsed_time.insert({layer_guid, elapsed_time}); } @@ -39,33 +46,44 @@ std::unordered_map> std::unordered_map> ModelTrainingInstance::backward() { - compute_loss(this->training_backing, this->loss_attrs, this->allocator); + execute_compute_loss(this->symbolic_cg.training_symbolic_computation_graph, + this->local_tensor_backing, + this->local_atomic_tensor_backing, + this->optimizer_attrs, + this->allocator, + this->runtime_arg_config); std::unordered_map> per_layer_elapsed_time; - for (layer_guid_t const &layer_guid : reversed(topological_ordering( - this->training_backing.training_computation_graph - .computation_graph))) { - std::optional elapsed_time = execute_backward( - this->training_backing.local_task_registry, - this->training_backing.local_tensor_backing, - this->training_backing.local_args_backing, - get_training_layer_plus_context( - this->training_backing.training_computation_graph, layer_guid), - this->allocator); + + for (symbolic_layer_guid_t symbolic_layer_guid : reversed(symbolic_cg_topological_ordering( + this->symbolic_cg.training_symbolic_computation_graph))) { + + std::optional elapsed_time = execute_backward_for_layer( + symbolic_layer_guid, + this->symbolic_cg.training_symbolic_computation_graph, + this->local_tensor_backing, + this->local_atomic_tensor_backing, + this->allocator, + this->local_task_registry, + this->runtime_arg_config); + + layer_guid_t layer_guid = this->symbolic_cg.layer_mapping.at_r(symbolic_layer_guid); per_layer_elapsed_time.insert({layer_guid, elapsed_time}); } return per_layer_elapsed_time; } void ModelTrainingInstance::update() { - for (layer_guid_t const &layer_guid : - topological_ordering(this->training_backing.training_computation_graph - .computation_graph)) { - execute_update(this->training_backing, - layer_guid, - this->optimizer_attrs, - this->allocator); + for (symbolic_layer_guid_t symbolic_layer_guid : + symbolic_cg_topological_ordering(this->symbolic_cg.training_symbolic_computation_graph)) { + execute_update_for_layer(symbolic_layer_guid, + this->symbolic_cg.training_symbolic_computation_graph, + this->local_tensor_backing, + this->local_atomic_tensor_backing, + this->allocator, + this->local_task_registry, + this->runtime_arg_config); } this->optimizer_attrs = get_optimizer_attrs_for_next_iter(this->optimizer_attrs); diff --git a/lib/local-execution/src/local_task_argument_accessor.cc b/lib/local-execution/src/local_task_argument_accessor.cc index 207305a8db..8e2e34bca8 100644 --- a/lib/local-execution/src/local_task_argument_accessor.cc +++ b/lib/local-execution/src/local_task_argument_accessor.cc @@ -8,11 +8,13 @@ namespace FlexFlow { LocalTaskArgumentAccessor::LocalTaskArgumentAccessor( Allocator const &allocator, - std::unordered_map const + std::unordered_map const &tensor_slots_backing, - std::unordered_map const &arg_slots_backing) + std::unordered_map const &arg_slots_backing, + size_t device_idx) : allocator(allocator), tensor_slots_backing(tensor_slots_backing), - arg_slots_backing(arg_slots_backing){}; + arg_slots_backing(arg_slots_backing), device_idx(device_idx) +{ }; ConcreteArgSpec const & LocalTaskArgumentAccessor::get_concrete_arg(slot_id_t name) const { @@ -20,9 +22,9 @@ ConcreteArgSpec const & } GenericTensorAccessor LocalTaskArgumentAccessor::get_tensor( - slot_id_t slot, Permissions priv, TensorType tensor_type) const { - tensor_sub_slot_id_t slot_tensor_type = - tensor_sub_slot_id_t{slot, tensor_type}; + slot_id_t slot, Permissions priv, TrainingTensorType tensor_type) const { + training_tensor_slot_id_t slot_tensor_type = + training_tensor_slot_id_t{slot, tensor_type}; GenericTensorAccessorW tensor_backing = this->tensor_slots_backing.at(slot_tensor_type).require_single(); if (priv == Permissions::RO) { @@ -37,9 +39,9 @@ GenericTensorAccessor LocalTaskArgumentAccessor::get_tensor( } VariadicGenericTensorAccessor LocalTaskArgumentAccessor::get_variadic_tensor( - slot_id_t slot, Permissions priv, TensorType tensor_type) const { - tensor_sub_slot_id_t slot_tensor_type = - tensor_sub_slot_id_t{slot, tensor_type}; + slot_id_t slot, Permissions priv, TrainingTensorType tensor_type) const { + training_tensor_slot_id_t slot_tensor_type = + training_tensor_slot_id_t{slot, tensor_type}; std::vector variadic_tensor_backing = this->tensor_slots_backing.at(slot_tensor_type).require_variadic(); if (priv == Permissions::RO) { @@ -62,7 +64,7 @@ Allocator LocalTaskArgumentAccessor::get_allocator() const { } size_t LocalTaskArgumentAccessor::get_device_idx() const { - return 0; + return this->device_idx; } } // namespace FlexFlow diff --git a/lib/local-execution/src/per_device_op_state.cc b/lib/local-execution/src/per_device_op_state.cc index a959f4a8c9..b58849b930 100644 --- a/lib/local-execution/src/per_device_op_state.cc +++ b/lib/local-execution/src/per_device_op_state.cc @@ -4,7 +4,7 @@ namespace FlexFlow { PerDeviceOpState get_device_state_from_device_specific( - DeviceSpecificDeviceStates const &device_specific, size_t device_idx) { + DeviceSpecificPerDeviceOpState const &device_specific, size_t device_idx) { return device_specific.visit( [&](auto const &x) { return PerDeviceOpState{*(x.get(device_idx))}; }); } diff --git a/lib/local-execution/src/task_binding.cc b/lib/local-execution/src/task_binding.cc deleted file mode 100644 index 3894fb8d34..0000000000 --- a/lib/local-execution/src/task_binding.cc +++ /dev/null @@ -1,111 +0,0 @@ -#include "task-spec/task_binding.h" -#include "pcg/tensor_guid_t.dtg.h" -#include "task-spec/training_tensor_guid_t.dtg.h" -#include "utils/containers/contains_key.h" -#include "utils/fmt/unordered_map.h" -#include "utils/hash/tuple.h" -#include "utils/hash/unordered_map.h" - -namespace FlexFlow { - -TaskBinding::TaskBinding() : tensor_bindings(), arg_bindings() {} - -TaskBinding::TaskBinding( - std::unordered_map const - &tensor_bindings, - std::unordered_map const &arg_bindings) - : tensor_bindings(tensor_bindings), arg_bindings(arg_bindings) {} - -void TaskBinding::bind(int name, forward_tensor_guid_t const &binding) { - this->bind(slot_id_t{name}, binding); -} - -void TaskBinding::bind(slot_id_t name, forward_tensor_guid_t const &binding) { - this->tensor_bindings.insert({tensor_sub_slot_id_t{name, TensorType::FORWARD}, - training_tensor_guid_t{binding}}); -} - -void TaskBinding::bind_grad(int name, gradient_tensor_guid_t const &binding) { - this->bind_grad(slot_id_t{name}, binding); -} - -void TaskBinding::bind_grad(slot_id_t name, - gradient_tensor_guid_t const &binding) { - this->tensor_bindings.insert( - {tensor_sub_slot_id_t{name, TensorType::GRADIENT}, - training_tensor_guid_t{binding}}); -} - -void TaskBinding::bind_optimizer(int name, - optimizer_tensor_guid_t const &binding) { - this->bind_optimizer(slot_id_t{name}, binding); -} - -void TaskBinding::bind_optimizer(slot_id_t name, - optimizer_tensor_guid_t const &binding) { - this->tensor_bindings.insert( - {tensor_sub_slot_id_t{name, TensorType::OPTIMIZER}, - training_tensor_guid_t{binding}}); -} - -void TaskBinding::bind_loss(int name, loss_tensor_guid_t const &binding) { - this->bind_loss(slot_id_t{name}, binding); -} - -void TaskBinding::bind_loss(slot_id_t name, loss_tensor_guid_t const &binding) { - this->tensor_bindings.insert({tensor_sub_slot_id_t{name, TensorType::LOSS}, - training_tensor_guid_t{binding}}); -} - -void TaskBinding::insert_arg_spec(slot_id_t name, TaskArgSpec const &arg_spec) { - assert(!contains_key(this->arg_bindings, name)); - this->arg_bindings.insert({name, arg_spec}); -} - -bool TaskBinding::operator==(TaskBinding const &other) const { - return this->tie() == other.tie(); -} - -bool TaskBinding::operator!=(TaskBinding const &other) const { - return this->tie() != other.tie(); -} - -std::tuple< - std::unordered_map const &, - std::unordered_map const &> - TaskBinding::tie() const { - return std::tie(this->tensor_bindings, this->arg_bindings); -} - -std::unordered_map const & - TaskBinding::get_tensor_bindings() const { - return this->tensor_bindings; -} - -std::unordered_map const & - TaskBinding::get_arg_bindings() const { - return this->arg_bindings; -} - -std::string format_as(TaskBinding const &x) { - std::ostringstream oss; - oss << " using namespace ::FlexFlow; diff --git a/lib/local-execution/test/src/local-execution/local_task_argument_accessor.cc b/lib/local-execution/test/src/local-execution/local_task_argument_accessor.cc index 482795b278..c917a8e99e 100644 --- a/lib/local-execution/test/src/local-execution/local_task_argument_accessor.cc +++ b/lib/local-execution/test/src/local-execution/local_task_argument_accessor.cc @@ -1,8 +1,8 @@ #include "local-execution/local_task_argument_accessor.h" -#include "doctest/doctest.h" #include "kernels/local_cpu_allocator.h" #include "task-spec/task_signature_impl.h" #include "utils/fmt/variant.h" +#include using namespace ::FlexFlow; @@ -36,24 +36,24 @@ TEST_SUITE(FF_TEST_SUITE) { VARIADIC_TENSORS, }; - std::unordered_map + std::unordered_map tensor_slots_backing = { { - tensor_sub_slot_id_t{slot_id_t{INPUT}, TensorType::FORWARD}, + training_tensor_slot_id_t{slot_id_t{INPUT}, TrainingTensorType::FORWARD}, TensorSlotBacking{input}, }, { - tensor_sub_slot_id_t{slot_id_t{INPUT}, TensorType::GRADIENT}, + training_tensor_slot_id_t{slot_id_t{INPUT}, TrainingTensorType::GRADIENT}, TensorSlotBacking{input_grad}, }, { - tensor_sub_slot_id_t{slot_id_t{VARIADIC_TENSORS}, - TensorType::FORWARD}, + training_tensor_slot_id_t{slot_id_t{VARIADIC_TENSORS}, + TrainingTensorType::FORWARD}, TensorSlotBacking{variadic_tensors}, }, { - tensor_sub_slot_id_t{slot_id_t{VARIADIC_TENSORS}, - TensorType::GRADIENT}, + training_tensor_slot_id_t{slot_id_t{VARIADIC_TENSORS}, + TrainingTensorType::GRADIENT}, TensorSlotBacking{variadic_tensors_grad}, }, }; @@ -62,69 +62,70 @@ TEST_SUITE(FF_TEST_SUITE) { /*allocator=*/allocator, /*tensor_slots_backing=*/tensor_slots_backing, /*arg_slots_backing=*/{}, + /*device_idx=*/0, }; SUBCASE("get_tensor") { - SUBCASE("get_tensor(slot_id_t, Permissions::RO, TensorType::FORWARD)") { + SUBCASE("get_tensor(slot_id_t, Permissions::RO, TrainingTensorType::FORWARD)") { GenericTensorAccessor correct = GenericTensorAccessor{ read_only_accessor_from_write_accessor(input)}; GenericTensorAccessor result = acc.get_tensor( - slot_id_t{INPUT}, Permissions::RO, TensorType::FORWARD); + slot_id_t{INPUT}, Permissions::RO, TrainingTensorType::FORWARD); CHECK(correct == result); } - SUBCASE("get_tensor(slot_id_t, Permissions::RO, TensorType::GRADIENT)") { + SUBCASE("get_tensor(slot_id_t, Permissions::RO, TrainingTensorType::GRADIENT)") { GenericTensorAccessor correct = GenericTensorAccessor{ read_only_accessor_from_write_accessor(input_grad)}; GenericTensorAccessor result = acc.get_tensor( - slot_id_t{INPUT}, Permissions::RO, TensorType::GRADIENT); + slot_id_t{INPUT}, Permissions::RO, TrainingTensorType::GRADIENT); CHECK(correct == result); } - SUBCASE("get_tensor(slot_id_t, Permissions::WO, TensorType::FORWARD)") { + SUBCASE("get_tensor(slot_id_t, Permissions::WO, TrainingTensorType::FORWARD)") { GenericTensorAccessor correct = GenericTensorAccessor{input}; GenericTensorAccessor result = acc.get_tensor( - slot_id_t{INPUT}, Permissions::WO, TensorType::FORWARD); + slot_id_t{INPUT}, Permissions::WO, TrainingTensorType::FORWARD); CHECK(correct == result); } - SUBCASE("get_tensor(slot_id_t, Permissions::WO, TensorType::GRADIENT)") { + SUBCASE("get_tensor(slot_id_t, Permissions::WO, TrainingTensorType::GRADIENT)") { GenericTensorAccessor correct = GenericTensorAccessor{input_grad}; GenericTensorAccessor result = acc.get_tensor( - slot_id_t{INPUT}, Permissions::WO, TensorType::GRADIENT); + slot_id_t{INPUT}, Permissions::WO, TrainingTensorType::GRADIENT); CHECK(correct == result); } - SUBCASE("get_tensor(slot_id_t, Permissions::RW, TensorType::FORWARD)") { + SUBCASE("get_tensor(slot_id_t, Permissions::RW, TrainingTensorType::FORWARD)") { GenericTensorAccessor correct = GenericTensorAccessor{input}; GenericTensorAccessor result = acc.get_tensor( - slot_id_t{INPUT}, Permissions::RW, TensorType::FORWARD); + slot_id_t{INPUT}, Permissions::RW, TrainingTensorType::FORWARD); CHECK(correct == result); } - SUBCASE("get_tensor(slot_id_t, Permissions::RW, TensorType::GRADIENT)") { + SUBCASE("get_tensor(slot_id_t, Permissions::RW, TrainingTensorType::GRADIENT)") { GenericTensorAccessor correct = GenericTensorAccessor{input_grad}; GenericTensorAccessor result = acc.get_tensor( - slot_id_t{INPUT}, Permissions::RW, TensorType::GRADIENT); + slot_id_t{INPUT}, Permissions::RW, TrainingTensorType::GRADIENT); CHECK(correct == result); } } SUBCASE("get_variadic_tensor") { SUBCASE("get_variadic_tensor(slot_id_t, Permissions::RO, " - "TensorType::FORWARD)") { + "TrainingTensorType::FORWARD)") { VariadicGenericTensorAccessor correct = VariadicGenericTensorAccessor{std::vector{ read_only_accessor_from_write_accessor(variadic_tensors.at(0)), read_only_accessor_from_write_accessor( variadic_tensors.at(1))}}; VariadicGenericTensorAccessor result = acc.get_variadic_tensor( - slot_id_t{VARIADIC_TENSORS}, Permissions::RO, TensorType::FORWARD); + slot_id_t{VARIADIC_TENSORS}, Permissions::RO, TrainingTensorType::FORWARD); CHECK(result == correct); } SUBCASE("get_variadic_tensor(slot_id_t, Permissions::RO, " - "TensorType::GRADIENT)") { + "TrainingTensorType::GRADIENT)") { VariadicGenericTensorAccessor correct = VariadicGenericTensorAccessor{std::vector{ read_only_accessor_from_write_accessor( @@ -132,43 +133,43 @@ TEST_SUITE(FF_TEST_SUITE) { read_only_accessor_from_write_accessor( variadic_tensors_grad.at(1))}}; VariadicGenericTensorAccessor result = acc.get_variadic_tensor( - slot_id_t{VARIADIC_TENSORS}, Permissions::RO, TensorType::GRADIENT); + slot_id_t{VARIADIC_TENSORS}, Permissions::RO, TrainingTensorType::GRADIENT); CHECK(result == correct); } SUBCASE("get_variadic_tensor(slot_id_t, Permissions::WO, " - "TensorType::FORWARD)") { + "TrainingTensorType::FORWARD)") { VariadicGenericTensorAccessor correct = VariadicGenericTensorAccessor{variadic_tensors}; VariadicGenericTensorAccessor result = acc.get_variadic_tensor( - slot_id_t{VARIADIC_TENSORS}, Permissions::WO, TensorType::FORWARD); + slot_id_t{VARIADIC_TENSORS}, Permissions::WO, TrainingTensorType::FORWARD); CHECK(result == correct); } SUBCASE("get_variadic_tensor(slot_id_t, Permissions::WO, " - "TensorType::GRADIENT)") { + "TrainingTensorType::GRADIENT)") { VariadicGenericTensorAccessor correct = VariadicGenericTensorAccessor{variadic_tensors_grad}; VariadicGenericTensorAccessor result = acc.get_variadic_tensor( - slot_id_t{VARIADIC_TENSORS}, Permissions::WO, TensorType::GRADIENT); + slot_id_t{VARIADIC_TENSORS}, Permissions::WO, TrainingTensorType::GRADIENT); CHECK(result == correct); } SUBCASE("get_variadic_tensor(slot_id_t, Permissions::WO, " - "TensorType::FORWARD)") { + "TrainingTensorType::FORWARD)") { VariadicGenericTensorAccessor correct = VariadicGenericTensorAccessor{variadic_tensors}; VariadicGenericTensorAccessor result = acc.get_variadic_tensor( - slot_id_t{VARIADIC_TENSORS}, Permissions::RW, TensorType::FORWARD); + slot_id_t{VARIADIC_TENSORS}, Permissions::RW, TrainingTensorType::FORWARD); CHECK(result == correct); } SUBCASE("get_variadic_tensor(slot_id_t, Permissions::WO, " - "TensorType::GRADIENT)") { + "TrainingTensorType::GRADIENT)") { VariadicGenericTensorAccessor correct = VariadicGenericTensorAccessor{variadic_tensors_grad}; VariadicGenericTensorAccessor result = acc.get_variadic_tensor( - slot_id_t{VARIADIC_TENSORS}, Permissions::RW, TensorType::GRADIENT); + slot_id_t{VARIADIC_TENSORS}, Permissions::RW, TrainingTensorType::GRADIENT); CHECK(result == correct); } } diff --git a/lib/local-execution/test/src/local-execution/local_task_registry.cc b/lib/local-execution/test/src/local-execution/local_task_registry.cc index 27cd74b2a6..5dc66c8ebc 100644 --- a/lib/local-execution/test/src/local-execution/local_task_registry.cc +++ b/lib/local-execution/test/src/local-execution/local_task_registry.cc @@ -206,8 +206,12 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Partial task does not exist") { ComputationGraphOpAttrs bmm_attrs = ComputationGraphOpAttrs{ - BatchMatmulAttrs{/*a_seq_length_dim=*/10_n, - /*b_seq_length_dim=*/20_n}}; + BatchMatmulAttrs{ + /*a_seq_length_dim=*/10_p, + /*b_seq_length_dim=*/20_p, + }, + }; + LocalTaskRegistry task_registry = construct_local_task_registry_for_layers({ {layer_guid, LayerAttrs{bmm_attrs, std::nullopt}}, diff --git a/lib/local-execution/test/src/local-execution/local_tensor_backing.cc b/lib/local-execution/test/src/local-execution/local_tensor_backing.cc index 2f5bf493d6..7cee014142 100644 --- a/lib/local-execution/test/src/local-execution/local_tensor_backing.cc +++ b/lib/local-execution/test/src/local-execution/local_tensor_backing.cc @@ -200,21 +200,21 @@ TEST_SUITE(FF_TEST_SUITE) { GenericTensorAccessorW t4_accessor = allocator.allocate_tensor(tensor_shape); - tensor_sub_slot_id_t tensor_slot_1_forward = tensor_sub_slot_id_t{ + training_tensor_slot_id_t tensor_slot_1_forward = training_tensor_slot_id_t{ slot_id_t{TENSOR_SLOT_1}, - TensorType::FORWARD, + TrainingTensorType::FORWARD, }; - tensor_sub_slot_id_t tensor_slot_1_gradient = tensor_sub_slot_id_t{ + training_tensor_slot_id_t tensor_slot_1_gradient = training_tensor_slot_id_t{ slot_id_t{TENSOR_SLOT_1}, - TensorType::GRADIENT, + TrainingTensorType::GRADIENT, }; - tensor_sub_slot_id_t tensor_slot_2_forward = tensor_sub_slot_id_t{ + training_tensor_slot_id_t tensor_slot_2_forward = training_tensor_slot_id_t{ slot_id_t{TENSOR_SLOT_2}, - TensorType::FORWARD, + TrainingTensorType::FORWARD, }; - tensor_sub_slot_id_t tensor_slot_3_forward = tensor_sub_slot_id_t{ + training_tensor_slot_id_t tensor_slot_3_forward = training_tensor_slot_id_t{ slot_id_t{TENSOR_SLOT_3}, - TensorType::FORWARD, + TrainingTensorType::FORWARD, }; LocalTensorBacking local_tensor_backing = LocalTensorBacking{ @@ -262,10 +262,10 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - std::unordered_map result = + std::unordered_map result = construct_tensor_slots_backing_for_binding(local_tensor_backing, task_binding); - std::unordered_map correct = { + std::unordered_map correct = { { tensor_slot_1_forward, TensorSlotBacking{t1_accessor}, diff --git a/lib/local-execution/test/src/local-execution/loss_functions.cc b/lib/local-execution/test/src/local-execution/loss_functions.cc index e5fffb980c..a40421d078 100644 --- a/lib/local-execution/test/src/local-execution/loss_functions.cc +++ b/lib/local-execution/test/src/local-execution/loss_functions.cc @@ -1,4 +1,3 @@ -#include "doctest/doctest.h" #include "internal/test_utils.h" #include "kernels/local_cuda_allocator.h" #include "kernels/managed_ff_stream.h" @@ -15,6 +14,7 @@ #include "task-spec/runtime_arg_config.h" #include "task-spec/training_computation_graph.h" #include "utils/containers/get_only.h" +#include using namespace ::FlexFlow; diff --git a/lib/local-pcg-execution/CMakeLists.txt b/lib/local-pcg-execution/CMakeLists.txt new file mode 100644 index 0000000000..5fadff777b --- /dev/null +++ b/lib/local-pcg-execution/CMakeLists.txt @@ -0,0 +1,21 @@ +ff_add_library( + NAME + local-pcg-execution + SRC_PATTERNS + src/*.cc + PUBLIC_INCLUDE + include/ + PRIVATE_INCLUDE + src/ + DEPS + op-attrs + utils + kernels + task-spec + local-execution + pcg + spdlog + compiler +) + +add_subdirectory(test) diff --git a/lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h b/lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h new file mode 100644 index 0000000000..76fd361828 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h @@ -0,0 +1,60 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_EXECUTE_TASKS_FOR_PARALLEL_LAYER_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_EXECUTE_TASKS_FOR_PARALLEL_LAYER_H + +#include "compiler/mapped_operator_task_group.h" +#include "local-execution/local_atomic_tensor_backing.dtg.h" +#include "local-execution/local_ready_to_launch_task.dtg.h" +#include "local-execution/local_task_registry.dtg.h" +#include "local-pcg-execution/local_parallel_tensor_backing.dtg.h" +#include "local-pcg-execution/mapped_per_device_op_states_group.h" +#include "local-pcg-execution/mapped_runtime_task_group.h" +#include "local-pcg-execution/task_group_execution_times.dtg.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/training_symbolic_computation_graph.dtg.h" + +namespace FlexFlow { + +std::unordered_map prepare_parallel_runtime_task_invocations( + RuntimeTaskInvocation const &, + LocalParallelTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + RuntimeArgConfig const &, + MappedRuntimeTaskGroup const &); + +std::optional execute_init_for_parallel_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalParallelTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &, + MappedRuntimeTaskGroup const &); + +std::optional execute_forward_for_parallel_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalParallelTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &, + MappedRuntimeTaskGroup const &); + +std::optional execute_forward_for_parallel_layer( + symbolic_layer_guid_t, + TrainingSymbolicComputationGraph const &, + LocalParallelTensorBacking const &, + LocalAtomicTensorBacking const &, + Allocator &, + LocalTaskRegistry const &, + RuntimeArgConfig const &, + MappedRuntimeTaskGroup const &); + + + +} // namespace FlexFlow + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h b/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h new file mode 100644 index 0000000000..95321d53b0 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h @@ -0,0 +1,40 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PARALLEL_TENSOR_BACKING_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PARALLEL_TENSOR_BACKING_H + +#include "kernels/allocation.h" +#include "local-execution/atomic_task_invocation.dtg.h" +#include "local-execution/tensor_slot_backing.dtg.h" +#include "local-pcg-execution/local_parallel_tensor_backing.dtg.h" +#include "local-pcg-execution/mapped_runtime_task_group.h" +#include "local-pcg-execution/parallel_tensor_accessors_w.dtg.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/training_tensor_slot_id_t.dtg.h" + +namespace FlexFlow { + +std::unordered_map + lower_parallel_runtime_task_invocation_to_atomic_task_invocation_group( + LocalParallelTensorBacking const &, + RuntimeTaskInvocation const &, + RuntimeArgConfig const &, + MappedRuntimeTaskGroup const &); + +AtomicTaskInvocation + lower_parallel_runtime_task_invocation_to_atomic_task_invocation( + LocalParallelTensorBacking const &, + RuntimeTaskInvocation const &, + RuntimeArgConfig const &, + MachineSpaceCoordinate const &, + RuntimeAtomicTaskShardBinding const &); + +// LocalParallelTensorBacking construct_local_parallel_tensor_backing( +// std::unordered_map const &training_ptensor_shapes, +// std::unordered_map const &preallocated_ptensors, +// Allocator &); + +} // namespace FlexFlow + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.struct.toml new file mode 100644 index 0000000000..b937718b43 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.struct.toml @@ -0,0 +1,23 @@ +namespace = "FlexFlow" +name = "LocalParallelTensorBacking" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/symbolic_training_tensor_guid_t.dtg.h", + "local-pcg-execution/training_parallel_tensor_shard_group.dtg.h", +] + +src_includes = [ + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", + "utils/ord/unordered_map.h", +] + +[[fields]] +name = "parallel_tensor_map" +type = "std::unordered_map<::FlexFlow::symbolic_training_tensor_guid_t, ::FlexFlow::TrainingParallelTensorShardGroup>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h new file mode 100644 index 0000000000..a39f91d591 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_ARGS_BACKING_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_ARGS_BACKING_H + +#include "local-pcg-execution/local_pcg_args_backing.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" +#include "task-spec/symbolic_layer_guid_t.dtg.h" +#include +#include + +namespace FlexFlow { + +std::unordered_map> + get_op_states_for_machine_space_coord(LocalPcgArgsBacking const &, MachineSpaceCoordinate const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.struct.toml new file mode 100644 index 0000000000..f35448810f --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "LocalPcgArgsBacking" +features = [] + +includes = [ + "task-spec/runtime_arg_config.dtg.h", + "task-spec/device_specific_device_states.dtg.h", + "local-pcg-execution/parallel_layer_instance_id_t.dtg.h", + "", + "", + "local-pcg-execution/mapped_per_device_op_states_group.h", +] + +[[fields]] +name = "runtime_arg_config" +type = "::FlexFlow::RuntimeArgConfig" + +[[fields]] +name = "per_device_op_states" +type = "std::unordered_map<::FlexFlow::symbolic_layer_guid_t, std::optional<::FlexFlow::MappedPerDeviceOpStatesGroup>>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h new file mode 100644 index 0000000000..5ebc12c10b --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h @@ -0,0 +1,40 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_TRAINING_BACKING_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_TRAINING_BACKING_H + +#include "local-pcg-execution/local_pcg_training_backing.dtg.h" +#include "utils/units/milliseconds_t.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" +#include "task-spec/training_parallel_layer_plus_context.dtg.h" + +namespace FlexFlow { + +LocalPcgTrainingBacking make_local_pcg_training_backing_for_pcg( + Allocator &allocator, + std::unordered_map + const &preallocated_tensors, + TrainingParallelComputationGraph const &training_pcg, + RuntimeArgConfig const &runtime_arg_config, + OptimizerAttrs const &optimizer_attrs, + MachineComputeSpecification const &machine_compute_specification); + +std::optional> + execute_forward(LocalTaskRegistry const &, + LocalParallelTensorBacking const &, + LocalPcgArgsBacking const &, + TrainingParallelLayerPlusContext const &, + Allocator &); + +std::optional> + execute_backward(); + +void compute_loss(LocalPcgTrainingBacking const &, LossAttrs const &, Allocator &); + +void execute_update(LocalPcgTrainingBacking const &, + parallel_layer_guid_t const &, + OptimizerAttrs const &, + Allocator &); + +} // namespace FlexFlow + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.struct.toml new file mode 100644 index 0000000000..637ee1559c --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.struct.toml @@ -0,0 +1,31 @@ +namespace = "FlexFlow" +name = "LocalPcgTrainingBacking" +features = [] + +includes = [ + "task-spec/training_parallel_computation_graph.dtg.h", + "local-execution/local_task_registry.dtg.h", + "local-pcg-execution/local_parallel_tensor_backing.dtg.h", + "local-pcg-execution/local_pcg_args_backing.dtg.h", + "pcg/machine_compute_specification.dtg.h", +] + +[[fields]] +name = "training_pcg" +type = "::FlexFlow::TrainingParallelComputationGraph" + +[[fields]] +name = "local_task_registry" +type = "::FlexFlow::LocalTaskRegistry" + +[[fields]] +name = "local_parallel_tensor_backing" +type = "::FlexFlow::LocalParallelTensorBacking" + +[[fields]] +name = "local_parallel_args_backing" +type = "::FlexFlow::LocalPcgArgsBacking" + +[[fields]] +name = "machine_compute_specification" +type = "::FlexFlow::MachineComputeSpecification" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h b/lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h new file mode 100644 index 0000000000..e22f7a3ac6 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h @@ -0,0 +1,46 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_PER_DEVICE_OP_STATES_GROUP_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_PER_DEVICE_OP_STATES_GROUP_H + +#include "compiler/mapped_operator_task_group.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" +#include "utils/bidict/bidict.h" +#include "compiler/mapped_task_signature_tensor_key.dtg.h" + +namespace FlexFlow { + +struct MappedPerDeviceOpStatesGroup { + MappedPerDeviceOpStatesGroup() = delete; + + explicit MappedPerDeviceOpStatesGroup(std::unordered_map const &per_device_op_states); + + [[nodiscard]] bool operator==(MappedPerDeviceOpStatesGroup const &) const; + [[nodiscard]] bool operator!=(MappedPerDeviceOpStatesGroup const &) const; + + [[nodiscard]] std::unordered_map const &get_per_device_op_states() const; + +private: + std::unordered_map shard_bindings; + +private: + [[nodiscard]] std::tuple< + decltype(shard_bindings) const & + > tie() const; + + friend struct ::std::hash; +}; + +std::string format_as(::FlexFlow::MappedPerDeviceOpStatesGroup const &); +std::ostream &operator<<(std::ostream &, ::FlexFlow::MappedPerDeviceOpStatesGroup const &); + +} // namespace FlexFlow + +namespace std { + +template <> +struct hash<::FlexFlow::MappedPerDeviceOpStatesGroup> { + size_t operator()(::FlexFlow::MappedPerDeviceOpStatesGroup const &) const; +}; + +} // namespace std +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h b/lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h new file mode 100644 index 0000000000..3b08d3035f --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h @@ -0,0 +1,53 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_RUNTIME_TASK_GROUP_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_RUNTIME_TASK_GROUP_H + +#include "compiler/mapped_operator_task_group.h" +#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "task-spec/fwb_op_task_type.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +struct MappedRuntimeTaskGroup { + MappedRuntimeTaskGroup() = delete; + + explicit MappedRuntimeTaskGroup(bidict const &shard_bindings); + + [[nodiscard]] bool operator==(MappedRuntimeTaskGroup const &) const; + [[nodiscard]] bool operator!=(MappedRuntimeTaskGroup const &) const; + + [[nodiscard]] bidict const &get_shard_bindings() const; + +private: + bidict shard_bindings; + +private: + [[nodiscard]] std::tuple< + decltype(shard_bindings) const & + > tie() const; + + friend struct ::std::hash; +}; + +std::string format_as(::FlexFlow::MappedRuntimeTaskGroup const &); +std::ostream &operator<<(std::ostream &, ::FlexFlow::MappedRuntimeTaskGroup const &); + +MappedRuntimeTaskGroup + lower_mapped_operator_task_group_to_mapped_runtime_task_group(MappedOperatorTaskGroup const &, + SymbolicLayerTrainingTensorGroupSignature const &, + FwbOpTaskType); + +} // namespace FlexFlow + +namespace std { + +template <> +struct hash<::FlexFlow::MappedRuntimeTaskGroup> { + size_t operator()(::FlexFlow::MappedRuntimeTaskGroup const &) const; +}; + +} // namespace std + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.struct.toml new file mode 100644 index 0000000000..2ee5c375e0 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "ParallelForwardTensorGroup" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", + "rapidcheck", +] + +includes = [ + "task-spec/forward_tensor_guid_t.dtg.h", + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "utils/bidict/bidict.h", +] + +[[fields]] +name = "forward_training_tensors_by_coord" +type = "::FlexFlow::bidict<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::forward_tensor_guid_t>" + diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.struct.toml new file mode 100644 index 0000000000..f8ac484591 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "parallel_layer_instance_id_t" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", + "pcg/gpu_id_t.dtg.h", +] + +[[fields]] +name = "parallel_layer_guid" +type = "::FlexFlow::parallel_layer_guid_t" + +[[fields]] +name = "gpu_id" +type = "::FlexFlow::gpu_id_t" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.struct.toml new file mode 100644 index 0000000000..ab6f5debe7 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "ParallelLossTensorGroup" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", + "rapidcheck", +] + +includes = [ + "task-spec/loss_tensor_guid_t.dtg.h", + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "utils/bidict/bidict.h", +] + +[[fields]] +name = "loss_training_tensors_by_coord" +type = "::FlexFlow::bidict<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::loss_tensor_guid_t>" + diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_model_training_instance.h b/lib/local-pcg-execution/include/local-pcg-execution/parallel_model_training_instance.h new file mode 100644 index 0000000000..c0374f454d --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/parallel_model_training_instance.h @@ -0,0 +1,43 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_PARALLEL_MODEL_TRAINING_INSTANCE_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_PARALLEL_MODEL_TRAINING_INSTANCE_H + +#include "compiler/mapped_parallel_computation_graph.dtg.h" +#include "kernels/allocation.h" +#include "local-execution/local_atomic_tensor_backing.dtg.h" +#include "local-execution/local_task_registry.dtg.h" +#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "task-spec/training_symbolic_computation_graph_from_pcg_conversion.dtg.h" +#include "local-pcg-execution/local_parallel_tensor_backing.dtg.h" +#include "local-pcg-execution/task_group_execution_times.dtg.h" + +namespace FlexFlow { + +struct ParallelModelTrainingInstance { + ParallelModelTrainingInstance(Allocator const &, + LossAttrs const &, + OptimizerAttrs const &); + +public: + std::unordered_map> forward(); + std::unordered_map> backward(); + void update(); + GenericTensorAccessorR get_loss_tensor_accessor() const; + +private: + Allocator allocator; + LossAttrs loss_attrs; + OptimizerAttrs optimizer_attrs; + TrainingSymbolicComputationGraphFromPcgConversion symbolic_cg; + MappedParallelComputationGraph mapped_pcg; + LocalParallelTensorBacking local_tensor_backing; + LocalAtomicTensorBacking local_atomic_tensor_backing; + LocalTaskRegistry local_task_registry; + RuntimeArgConfig runtime_arg_config; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.struct.toml new file mode 100644 index 0000000000..60c8af7e09 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.struct.toml @@ -0,0 +1,13 @@ +namespace = "FlexFlow" +name = "ParallelTensorAccessorsW" +features = [] + +includes = [ + "", + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "kernels/accessor.h", +] + +[[fields]] +name = "shard_map" +type = "std::unordered_map<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::GenericTensorAccessorW>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h b/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h new file mode 100644 index 0000000000..c2933ebd2f --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_RUNTIME_ATOMIC_TASK_SHARD_BINDING_H +#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_RUNTIME_ATOMIC_TASK_SHARD_BINDING_H + +#include "compiler/operator_atomic_task_shard_binding.dtg.h" +#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" +#include "task-spec/fwb_op_task_type.dtg.h" + +namespace FlexFlow { + +RuntimeAtomicTaskShardBinding + lower_op_shard_binding_to_fwd_pass_runtime_shard_binding(OperatorAtomicTaskShardBinding const &, + SymbolicLayerTrainingTensorGroupSignature const &); + +RuntimeAtomicTaskShardBinding + lower_op_shard_binding_to_bwd_pass_runtime_shard_binding(OperatorAtomicTaskShardBinding const &, + SymbolicLayerTrainingTensorGroupSignature const &); + +RuntimeAtomicTaskShardBinding + lower_op_shard_binding_to_runtime_shard_binding(OperatorAtomicTaskShardBinding const &, + SymbolicLayerTrainingTensorGroupSignature const &, + FwbOpTaskType); + +} // namespace FlexFlow + +#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.struct.toml new file mode 100644 index 0000000000..a2640ebc13 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.struct.toml @@ -0,0 +1,24 @@ +namespace = "FlexFlow" +name = "RuntimeAtomicTaskShardBinding" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "", + "task-spec/symbolic_training_tensor_guid_t.dtg.h", +] + +src_includes = [ + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", + "utils/ord/unordered_map.h", +] + +[[fields]] +name = "raw_binding" +type = "std::unordered_map<::FlexFlow::symbolic_training_tensor_guid_t, ::FlexFlow::ParallelTensorSpaceCoordinate>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.struct.toml new file mode 100644 index 0000000000..584e183908 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "TaskGroupExecutionTimes" +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +includes = [ + "", + "utils/units/milliseconds_t.h", + "pcg/machine_space_coordinate.dtg.h", +] + +src_includes = [ + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", + "utils/ord/unordered_map.h", +] + +[[fields]] +name = "execution_times" +type = "std::unordered_map<::FlexFlow::MachineSpaceCoordinate, ::FlexFlow::milliseconds_t>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.struct.toml new file mode 100644 index 0000000000..9ad75d6965 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.struct.toml @@ -0,0 +1,33 @@ +namespace = "FlexFlow" +name = "TrainingOperatorTaskSignature" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", + "rapidcheck", +] + +includes = [ + "task-spec/training_tensor_guid_t.dtg.h", + "", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h", + "utils/ord/vector.h", +] + +[[fields]] +name = "inputs" +type = "std::vector<::FlexFlow::training_tensor_guid_t>" + +[[fields]] +name = "weights" +type = "std::vector<::FlexFlow::training_tensor_guid_t>" + +[[fields]] +name = "outputs" +type = "std::vector<::FlexFlow::training_tensor_guid_t>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.struct.toml new file mode 100644 index 0000000000..edf664d442 --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.struct.toml @@ -0,0 +1,29 @@ +namespace = "FlexFlow" +name = "TrainingParallelLayerPlusContext" +features = [] + +includes = [ + "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", + "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h", + "task-spec/training_parallel_tensor_group_with_attrs.dtg.h", +] + +[[fields]] +name = "parallel_layer_guid" +type = "::FlexFlow::parallel_layer_guid_t" + +[[fields]] +name = "parallel_layer_attrs" +type = "::FlexFlow::ParallelLayerAttrs" + +[[fields]] +name = "input_parallel_tensor_groups" +type = "std::vector<::FlexFlow::TrainingParallelTensorGroupWithAttrs>" + +[[fields]] +name = "weight_tensor_groups" +type = "std::vector<::FlexFlow::TrainingParallelTensorGroupWithAttrs>" + +[[fields]] +name = "output_tensor_groups" +type = "std::vector<::FlexFlow::TrainingParallelTensorGroupWithAttrs>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.struct.toml b/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.struct.toml new file mode 100644 index 0000000000..4b53c8ec2b --- /dev/null +++ b/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "TrainingParallelTensorShardGroup" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", + "rapidcheck", +] + +includes = [ + "", + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "local-execution/atomic_training_tensor_guid_t.dtg.h", +] + +[[fields]] +name = "shard_map" +type = "std::unordered_map<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::atomic_training_tensor_guid_t>" diff --git a/lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc b/lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc new file mode 100644 index 0000000000..3e6c940d50 --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc @@ -0,0 +1,198 @@ +#include "local-pcg-execution/execute_tasks_for_parallel_layer.h" +#include "local-execution/local_atomic_tensor_backing.h" +#include "local-execution/local_task_registry.h" +#include "local-pcg-execution/local_parallel_tensor_backing.h" +#include "local-pcg-execution/task_group_execution_times.dtg.h" +#include "task-spec/fwb_op_task_type.h" +#include "task-spec/training_symbolic_computation_graph.h" +#include "utils/containers/flatmap.h" +#include "utils/containers/lift_optional_through_map.h" +#include "utils/containers/map_values.h" +#include "utils/containers/values.h" +#include "utils/containers/all_of.h" + +namespace FlexFlow { + +std::unordered_map prepare_parallel_runtime_task_invocations( + RuntimeTaskInvocation const &runtime_task_invocation, + LocalParallelTensorBacking const ¶llel_tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + RuntimeArgConfig const &runtime_arg_config, + MappedRuntimeTaskGroup const &task_group) { + + std::unordered_map + atomic_task_invocations = + lower_parallel_runtime_task_invocation_to_atomic_task_invocation_group( + parallel_tensor_backing, + runtime_task_invocation, + runtime_arg_config, + task_group); + + return map_values(atomic_task_invocations, + [&](AtomicTaskInvocation const &atomic_task_invocation) + -> LocalReadyToLaunchTask + { + TaskArgumentAccessor task_arg_accessor = + get_task_arg_accessor_for_atomic_task_invocation( + atomic_tensor_backing, + atomic_task_invocation, + allocator); + + return LocalReadyToLaunchTask{ + atomic_task_invocation.task_id, + task_arg_accessor, + }; + }); +} + + std::optional execute_init_for_parallel_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &g, + LocalParallelTensorBacking const ¶llel_tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config, + MappedRuntimeTaskGroup const &task_group) { + + SymbolicCgOpAttrsAndTrainingSignatureWithShapes attrs_and_signature = + get_attrs_and_signature_for_layer(g, symbolic_layer_guid); + + RuntimeTaskInvocation runtime_task_invocation = ({ + std::optional maybe_runtime_task_invocation = + get_init_runtime_task_invocation_for_layer( + symbolic_layer_guid, + attrs_and_signature); + if (!maybe_runtime_task_invocation.has_value()) { + return std::nullopt; + } + maybe_runtime_task_invocation.value(); + }); + + std::unordered_map + prepared_tasks = prepare_parallel_runtime_task_invocations( + runtime_task_invocation, + parallel_tensor_backing, + atomic_tensor_backing, + allocator, + runtime_arg_config, + task_group); + + std::unordered_map> op_state_by_shard = + map_values(prepared_tasks, + [&](LocalReadyToLaunchTask const &prepared_task) -> std::optional { + return call_init_task_impl( + task_registry, + prepared_task.task_id, + prepared_task.task_arg_accessor); + }); + + return transform( + lift_optional_through_map(op_state_by_shard), + [](std::unordered_map const &m) { + return MappedPerDeviceOpStatesGroup{m}; + }); +} + + +static std::optional execute_fwb_for_parallel_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &g, + LocalParallelTensorBacking const ¶llel_tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config, + MappedRuntimeTaskGroup const &task_group, + FwbOpTaskType fwb_task_type) { + + SymbolicCgOpAttrsAndTrainingSignatureWithShapes attrs_and_signature = + get_attrs_and_signature_for_layer(g, symbolic_layer_guid); + + OpTaskType op_task_type = assert_unwrap( + op_task_type_from_fwb_op_task_type(fwb_task_type)); + + RuntimeTaskInvocation runtime_task_invocation = ({ + std::optional maybe_runtime_task_invocation = + get_runtime_task_invocation_for_layer_and_type( + symbolic_layer_guid, + attrs_and_signature, + op_task_type); + if (!maybe_runtime_task_invocation.has_value()) { + return std::nullopt; + } + maybe_runtime_task_invocation.value(); + }); + + std::unordered_map + prepared_tasks = prepare_parallel_runtime_task_invocations( + runtime_task_invocation, + parallel_tensor_backing, + atomic_tensor_backing, + allocator, + runtime_arg_config, + task_group); + + + std::unordered_map> timing_by_shard = + map_values(prepared_tasks, + [&](LocalReadyToLaunchTask const &prepared_task) -> std::optional { + return call_fwb_task_impl( + task_registry, + prepared_task.task_id, + prepared_task.task_arg_accessor); + }); + + return transform( + lift_optional_through_map(timing_by_shard), + [](std::unordered_map const &m) { + return TaskGroupExecutionTimes{m}; + }); +} + +std::optional execute_forward_for_parallel_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &g, + LocalParallelTensorBacking const ¶llel_tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config, + MappedRuntimeTaskGroup const &task_group) { + + return execute_fwb_for_parallel_layer( + symbolic_layer_guid, + g, + parallel_tensor_backing, + atomic_tensor_backing, + allocator, + task_registry, + runtime_arg_config, + task_group, + FwbOpTaskType::FWD); +} + +std::optional execute_backward_for_parallel_layer( + symbolic_layer_guid_t symbolic_layer_guid, + TrainingSymbolicComputationGraph const &g, + LocalParallelTensorBacking const ¶llel_tensor_backing, + LocalAtomicTensorBacking const &atomic_tensor_backing, + Allocator &allocator, + LocalTaskRegistry const &task_registry, + RuntimeArgConfig const &runtime_arg_config, + MappedRuntimeTaskGroup const &task_group) { + + return execute_fwb_for_parallel_layer( + symbolic_layer_guid, + g, + parallel_tensor_backing, + atomic_tensor_backing, + allocator, + task_registry, + runtime_arg_config, + task_group, + FwbOpTaskType::BWD); +} + +} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc b/lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc new file mode 100644 index 0000000000..4fb9f8490a --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc @@ -0,0 +1,81 @@ +#include "local-pcg-execution/local_parallel_tensor_backing.h" +#include "local-pcg-execution/local_pcg_args_backing.dtg.h" +#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" +#include "task-spec/lower_op_task_invocation_to_runtime_task_invocation.h" +#include "utils/containers/map_values.h" +#include "utils/containers/map_values2.h" +#include "utils/containers/try_at.h" +#include "local-pcg-execution/local_pcg_args_backing.h" + +namespace FlexFlow { + +std::unordered_map + lower_parallel_runtime_task_invocation_to_atomic_task_invocation_group( + LocalParallelTensorBacking const ¶llel_tensor_backing, + LocalPcgArgsBacking const ¶llel_args_backing, + RuntimeTaskInvocation const &runtime_task_invocation, + MappedRuntimeTaskGroup const &runtime_task_group) { + + std::unordered_map + shard_bindings = runtime_task_group.get_shard_bindings().as_unordered_map(); + + return + map_values2( + shard_bindings, + [&](MachineSpaceCoordinate const &machine_space_coord, + RuntimeAtomicTaskShardBinding const &shard_binding) + -> AtomicTaskInvocation + { + return lower_parallel_runtime_task_invocation_to_atomic_task_invocation( + parallel_tensor_backing, + runtime_task_invocation, + parallel_args_backing.runtime_arg_config, + get_op_states_for_machine_space_coord(parallel_args_backing, machine_space_coord), + machine_space_coord, + shard_binding); + }); +} + + +AtomicTaskInvocation + lower_parallel_runtime_task_invocation_to_atomic_task_invocation( + LocalParallelTensorBacking const ¶llel_tensor_backing, + RuntimeTaskInvocation const &invocation, + RuntimeArgConfig const &runtime_arg_config, + std::unordered_map> const &per_device_op_states, + MachineSpaceCoordinate const &machine_space_coord, + RuntimeAtomicTaskShardBinding const &shard_binding) { + + std::unordered_map + tensor_bindings = map_values(invocation.binding.get_tensor_bindings(), + [&](symbolic_training_tensor_guid_t t) + -> atomic_training_tensor_guid_t + { + return parallel_tensor_backing.parallel_tensor_map.at(t); + }); + + auto get_op_state_for_layer = [&](symbolic_layer_guid_t l) -> std::optional { + return per_device_op_states.at(l); + }; + + std::unordered_map + arg_bindings = map_values(invocation.binding.get_arg_bindings(), + [&](RuntimeArgSpec const &arg_spec) -> ConcreteArgSpec + { + return lower_runtime_arg_ref_spec_to_concrete_arg_spec( + arg_spec, + runtime_arg_config, + get_op_state_for_layer); + }); + + return AtomicTaskInvocation{ + invocation.task_id, + AtomicTaskBinding{ + tensor_bindings, + arg_bindings, + }, + }; +} + +} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc b/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc new file mode 100644 index 0000000000..a43f960d9e --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc @@ -0,0 +1,61 @@ +#include "local-pcg-execution/local_pcg_args_backing.h" + +namespace FlexFlow { + +std::unordered_map> + get_op_states_for_machine_space_coord(LocalPcgArgsBacking const &args_backing, MachineSpaceCoordinate const &coord) { + + return map_values( + args_backing.per_device_op_states, + [&](std::optional const &m_g) { + return transform( + m_g, + [&](MappedPerDeviceOpStatesGroup const &g) { + return g.get_per_device_op_states().at_l(coord); + }); + }); +} + + +// +// +// TaskArgumentAccessor +// get_task_arg_accessor(LocalParallelTensorBacking const &local_parallel_tensor_backing, +// RuntimeArgConfig const &runtime_arg_config, +// TaskInvocation const &invocation, +// Allocator &allocator) { +// std::unordered_map +// tensor_slots_backing = construct_tensor_slots_backing_for_binding( +// local_tensor_backing, invocation.binding); +// +// std::unordered_map arg_slots_backing = +// construct_arg_slots_backing(invocation.binding, runtime_arg_config); +// +// return TaskArgumentAccessor::create( +// allocator, tensor_slots_backing, arg_slots_backing, ); +// } +// +// LocalPcgArgsBacking make_local_pcg_args_backing_for_parallel_computation_graph( +// LocalTaskRegistry const &task_registry, +// TrainingParallelComputationGraph const &training_pcg, +// RuntimeArgConfig const &runtime_arg_config, +// LocalParallelTensorBacking const &local_parallel_tensor_backing, +// Allocator &allocator) { +// +// std::unordered_map> +// per_device_op_states = generate_map( +// get_parallel_layers(training_pcg.pcg), +// [&](parallel_layer_instance_id const ¶llel_layer_guid) { +// return create_per_device_op_state( +// task_registry, +// local_tensor_backing, +// runtime_arg_config, +// allocator, +// get_training_layer_plus_context(training_computation_graph, +// layer_guid)); +// }); +// +// } + + +} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc b/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc new file mode 100644 index 0000000000..e006e09e89 --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc @@ -0,0 +1,54 @@ +#include "local-pcg-execution/local_pcg_training_backing.h" +#include "local-execution/local_task_registry.h" + +namespace FlexFlow { + +LocalPcgTrainingBacking make_local_pcg_training_backing_for_pcg( + Allocator &allocator, + std::unordered_map + const &preallocated_tensors, + TrainingParallelComputationGraph const &training_pcg, + RuntimeArgConfig const &runtime_arg_config, + OptimizerAttrs const &optimizer_attrs, + MachineComputeSpecification const &machine_compute_specification) { + + NOT_IMPLEMENTED(); +} + +std::optional> + execute_forward(LocalTaskRegistry const &local_task_registry, + LocalParallelTensorBacking const &, + LocalPcgArgsBacking const &, + TrainingParallelLayerPlusContext const &training_parallel_layer, + Allocator &) { + + // std::optional maybe_registered_task = try_get_registered_task( + // local_task_registry, training_parallel_layer.parallel_layer_guid, OpTaskType::FWD); + // + // ASSERT(maybe_registered_task.has_value()); + // + // registered_task_t registered_task = maybe_registered_task.value(); + // if (registered_task.is_noop_task()) { + // return std::nullopt; + // } + + NOT_IMPLEMENTED(); +} + +std::optional> + execute_backward() { + NOT_IMPLEMENTED(); +} + +void compute_loss(LocalPcgTrainingBacking const &, LossAttrs const &, Allocator &) { + NOT_IMPLEMENTED(); +} + +void execute_update(LocalPcgTrainingBacking const &, + parallel_layer_guid_t const &, + OptimizerAttrs const &, + Allocator &) { + NOT_IMPLEMENTED(); +} + +} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc b/lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc new file mode 100644 index 0000000000..3a18c8755a --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc @@ -0,0 +1,117 @@ +#include "local-pcg-execution/mapped_per_device_op_states_group.h" +#include "op-attrs/get_operator_task_space.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "pcg/machine_view.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/containers/require_all_same.h" +#include "utils/containers/transform.h" +#include "utils/containers/vector_of.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/containers/are_all_distinct.h" +#include "utils/hash/tuple.h" + +namespace FlexFlow { + +MappedPerDeviceOpStatesGroup::MappedPerDeviceOpStatesGroup( + bidict const &per_device_op_states) + : per_device_op_states(per_device_op_states) +{ + auto check_arity = [&](TensorRole tensor_role) -> nonnegative_int { + std::unordered_set arities = + transform(shard_bindings.right_values(), + [&](OperatorAtomicTaskShardBinding const &s) -> nonnegative_int { + return num_elements(ptensor_space_coords_for_role(s, tensor_role)); + }); + + return require_all_same(arities).value_or(0_n); + }; + + nonnegative_int num_inputs = check_arity(TensorRole::INPUT); + nonnegative_int num_weights = check_arity(TensorRole::WEIGHT); + nonnegative_int num_outputs = check_arity(TensorRole::OUTPUT); + + std::unordered_set all_keys = + all_keys_for_signature_arities( + /*num_inputs=*/num_inputs, + /*num_weights=*/num_weights, + /*num_outputs=*/num_outputs); + + for (TaskSignatureTensorKey const &key : all_keys) { + std::vector signatures_for_key = vector_of(shard_bindings.right_values()); + + std::vector coords_for_key = + transform(signatures_for_key, + [&](OperatorAtomicTaskShardBinding const &signature) { + return ptensor_space_coord_for_key(signature, key); + }); + + ASSERT(are_all_distinct(coords_for_key)); + + std::vector coord_dims_for_key = + transform(coords_for_key, + [](ParallelTensorSpaceCoordinate const &c) { + return ptensor_coord_num_dims(c); + }); + + require_all_same(coord_dims_for_key); + } +} + +bool MappedPerDeviceOpStatesGroup::operator==(MappedPerDeviceOpStatesGroup const &other) const { + return this->tie() == other.tie(); +} + +bool MappedPerDeviceOpStatesGroup::operator!=(MappedPerDeviceOpStatesGroup const &other) const { + return this->tie() == other.tie(); +} + +std::tuple< + bidict const & +> MappedPerDeviceOpStatesGroup::tie() const { + + return std::tie(this->shard_bindings); +} + +bidict const &MappedPerDeviceOpStatesGroup::get_shard_bindings() const { + return this->shard_bindings; +} + +std::string format_as(::FlexFlow::MappedPerDeviceOpStatesGroup const &m) { + return fmt::format("", m.get_shard_bindings()); +} + +std::ostream &operator<<(std::ostream &s, ::FlexFlow::MappedPerDeviceOpStatesGroup const &x) { + return (s << fmt::to_string(x)); +} + +MappedPerDeviceOpStatesGroup + mapped_operator_task_group_from_machine_view( + ComputationGraphOpAttrs const &op_attrs, + std::vector const &inputs_dim_degrees, + MachineView const &machine_view) { + + OperatorTaskSpace op_task_space = get_operator_task_space(op_attrs, inputs_dim_degrees); + + return MappedPerDeviceOpStatesGroup{ + generate_bidict(get_machine_space_coordinates(op_task_space, machine_view), + [&](MachineSpaceCoordinate const &machine_space_coord) { + return operator_atomic_task_shard_binding_from_machine_view( + op_attrs, + inputs_dim_degrees, + machine_view, + machine_space_coord); + }), + }; +} + + +} // namespace FlexFlow + +namespace std { + +size_t hash<::FlexFlow::MappedPerDeviceOpStatesGroup>::operator()(::FlexFlow::MappedPerDeviceOpStatesGroup const &x) const { + return ::FlexFlow::get_std_hash(x.tie()); +} + +} // namespace std diff --git a/lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc b/lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc new file mode 100644 index 0000000000..004b0f9a51 --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc @@ -0,0 +1,117 @@ +#include "local-pcg-execution/mapped_runtime_task_group.h" +#include "compiler/operator_atomic_task_shard_binding.h" +#include "compiler/task_signature_tensor_key.h" +#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" +#include "local-pcg-execution/runtime_atomic_task_shard_binding.h" +#include "op-attrs/get_operator_task_space.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "pcg/machine_view.h" +#include "utils/bidict/algorithms/transform_values.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/containers/require_all_same.h" +#include "compiler/task_signature_tensor_key.dtg.h" +#include "utils/containers/transform.h" +#include "utils/containers/vector_of.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/containers/are_all_distinct.h" +#include "utils/hash/tuple.h" + +namespace FlexFlow { + +MappedRuntimeTaskGroup::MappedRuntimeTaskGroup( + bidict const &shard_bindings) + : shard_bindings(shard_bindings) +{ + auto check_arity = [&](TensorRole tensor_role) -> nonnegative_int { + std::unordered_set arities = + transform(shard_bindings.right_values(), + [&](RuntimeAtomicTaskShardBinding const &s) -> nonnegative_int { + return num_elements(ptensor_space_coords_for_role(s, tensor_role)); + }); + + return require_all_same(arities).value_or(0_n); + }; + + nonnegative_int num_inputs = check_arity(TensorRole::INPUT); + nonnegative_int num_weights = check_arity(TensorRole::WEIGHT); + nonnegative_int num_outputs = check_arity(TensorRole::OUTPUT); + + std::unordered_set all_keys = + all_keys_for_signature_arities( + /*num_inputs=*/num_inputs, + /*num_weights=*/num_weights, + /*num_outputs=*/num_outputs); + + for (TaskSignatureTensorKey const &key : all_keys) { + std::vector signatures_for_key = vector_of(shard_bindings.right_values()); + + std::vector coords_for_key = + transform(signatures_for_key, + [&](RuntimeAtomicTaskShardBinding const &signature) { + return ptensor_space_coord_for_key(signature, key); + }); + + ASSERT(are_all_distinct(coords_for_key)); + + std::vector coord_dims_for_key = + transform(coords_for_key, + [](ParallelTensorSpaceCoordinate const &c) { + return ptensor_coord_num_dims(c); + }); + + require_all_same(coord_dims_for_key); + } +} + +bool MappedRuntimeTaskGroup::operator==(MappedRuntimeTaskGroup const &other) const { + return this->tie() == other.tie(); +} + +bool MappedRuntimeTaskGroup::operator!=(MappedRuntimeTaskGroup const &other) const { + return this->tie() == other.tie(); +} + +std::tuple< + bidict const & +> MappedRuntimeTaskGroup::tie() const { + + return std::tie(this->shard_bindings); +} + +bidict const &MappedRuntimeTaskGroup::get_shard_bindings() const { + return this->shard_bindings; +} + +std::string format_as(::FlexFlow::MappedRuntimeTaskGroup const &m) { + return fmt::format("", m.get_shard_bindings()); +} + +std::ostream &operator<<(std::ostream &s, ::FlexFlow::MappedRuntimeTaskGroup const &x) { + return (s << fmt::to_string(x)); +} + +MappedRuntimeTaskGroup + lower_mapped_operator_task_group_to_mapped_runtime_task_group(MappedOperatorTaskGroup const &op_task_group, + SymbolicLayerTrainingTensorGroupSignature const &symbolic_layer_signature, + FwbOpTaskType task_type) { + return MappedRuntimeTaskGroup{ + transform_values( + op_task_group.get_shard_bindings(), + [&](RuntimeAtomicTaskShardBinding const &op_shard_binding) + -> RuntimeAtomicTaskShardBinding + { + return lower_op_shard_binding_to_runtime_shard_binding(op_shard_binding, symbolic_layer_signature, task_type); + }), + }; +} + +} // namespace FlexFlow + +namespace std { + +size_t hash<::FlexFlow::MappedRuntimeTaskGroup>::operator()(::FlexFlow::MappedRuntimeTaskGroup const &x) const { + return ::FlexFlow::get_std_hash(x.tie()); +} + +} // namespace std diff --git a/lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc b/lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc new file mode 100644 index 0000000000..9068560a25 --- /dev/null +++ b/lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc @@ -0,0 +1,81 @@ +#include "local-pcg-execution/runtime_atomic_task_shard_binding.h" +#include "compiler/operator_atomic_task_shard_binding.h" +#include "op-attrs/tensor_role.dtg.h" +#include "task-spec/fwb_tensor_type.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.h" +#include "utils/containers/map_from_keys_and_values.h" +#include "utils/containers/merge_disjoint_maps.h" +#include "utils/containers/transform.h" + +namespace FlexFlow { + +static std::unordered_map + get_tensor_shard_binding_for_type( + SymbolicLayerTrainingTensorGroupSignature const &signature, + OperatorAtomicTaskShardBinding const &shard_binding, + TensorRole tensor_role, + FwbTensorType tensor_type) { + + std::vector keys + = get_training_tensors_for_role_and_type(signature, tensor_role, tensor_type); + + std::vector pt_coords + = ptensor_space_coords_for_role(shard_binding, tensor_role); + + return map_from_keys_and_values( + /*keys=*/keys, + /*values=*/pt_coords); +}; + +RuntimeAtomicTaskShardBinding + lower_op_shard_binding_to_runtime_shard_binding(OperatorAtomicTaskShardBinding const &op_shard_binding, + SymbolicLayerTrainingTensorGroupSignature const &signature) { + + auto get_bindings = [&](TensorRole tensor_role, FwbTensorType tensor_type) { + return get_tensor_shard_binding_for_type(signature, op_shard_binding, tensor_role, tensor_type); + }; + + return RuntimeAtomicTaskShardBinding{ + merge_disjoint_maps(std::vector{ + get_bindings(TensorRole::INPUT, FwbTensorType::FORWARD), + get_bindings(TensorRole::WEIGHT, FwbTensorType::FORWARD), + get_bindings(TensorRole::OUTPUT, FwbTensorType::FORWARD), + }), + }; +} + +RuntimeAtomicTaskShardBinding + lower_op_shard_binding_to_bwd_pass_runtime_shard_binding(OperatorAtomicTaskShardBinding const &op_shard_binding, + SymbolicLayerTrainingTensorGroupSignature const &signature) { + + auto get_bindings = [&](TensorRole tensor_role, FwbTensorType tensor_type) { + return get_tensor_shard_binding_for_type(signature, op_shard_binding, tensor_role, tensor_type); + }; + + return RuntimeAtomicTaskShardBinding{ + merge_disjoint_maps(std::vector{ + get_bindings(TensorRole::INPUT, FwbTensorType::FORWARD), + get_bindings(TensorRole::WEIGHT, FwbTensorType::FORWARD), + get_bindings(TensorRole::OUTPUT, FwbTensorType::FORWARD), + get_bindings(TensorRole::INPUT, FwbTensorType::GRADIENT), + get_bindings(TensorRole::WEIGHT, FwbTensorType::GRADIENT), + get_bindings(TensorRole::OUTPUT, FwbTensorType::GRADIENT), + }), + }; +} + +RuntimeAtomicTaskShardBinding + lower_op_shard_binding_to_runtime_shard_binding(OperatorAtomicTaskShardBinding const &shard_binding, + SymbolicLayerTrainingTensorGroupSignature const &signature, + FwbOpTaskType task_type) { + switch (task_type) { + case FwbOpTaskType::FWD: + return lower_op_shard_binding_to_fwd_pass_runtime_shard_binding(shard_binding, signature); + case FwbOpTaskType::BWD: + return lower_op_shard_binding_to_bwd_pass_runtime_shard_binding(shard_binding, signature); + default: + PANIC("Unhandled FwbOpTaskType", task_type); + } +} + +} // namespace FlexFlow diff --git a/lib/local-pcg-execution/test/CMakeLists.txt b/lib/local-pcg-execution/test/CMakeLists.txt new file mode 100644 index 0000000000..a7427fe351 --- /dev/null +++ b/lib/local-pcg-execution/test/CMakeLists.txt @@ -0,0 +1,16 @@ +ff_add_test_executable( + NAME + local-pcg-execution-tests + SRC_PATTERNS + src/*.cc + PRIVATE_INCLUDE + src/ + DEPS + doctest + utils-test-common + local-pcg-execution + kernels + op-attrs + task-spec +) + diff --git a/lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc b/lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc new file mode 100644 index 0000000000..429e955c1f --- /dev/null +++ b/lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc @@ -0,0 +1,10 @@ +#include +#include "local-pcg-execution/local_pcg_training_backing.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("LocalPcgTrainingBacking") { + CHECK_MESSAGE(false, "TODO: LocalPcgTrainingBacking"); + } +} diff --git a/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.h b/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.h index 52e6e12a8c..fd0707aa2e 100644 --- a/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.h +++ b/lib/op-attrs/include/op-attrs/computation_graph_op_attrs.h @@ -9,7 +9,7 @@ namespace FlexFlow { OperatorType get_op_type(ComputationGraphOpAttrs const &); RecordFormatter as_dot(ComputationGraphOpAttrs const &); -ComputationGraphOpAttrs +std::optional compgraph_op_attrs_from_pcg_op_attrs(PCGOperatorAttrs const &); } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h b/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h deleted file mode 100644 index 5c47745209..0000000000 --- a/lib/op-attrs/include/op-attrs/dim_ordered/dim_ordered.h +++ /dev/null @@ -1,200 +0,0 @@ -#ifndef _FLEXFLOW_OPATTRS_INCLUDE_OPATTRS_FF_STACK_VECTOR_H -#define _FLEXFLOW_OPATTRS_INCLUDE_OPATTRS_FF_STACK_VECTOR_H - -#include "op-attrs/ff_dim_t.dtg.h" -#include "op-attrs/relative_ff_dim_t.dtg.h" -#include "utils/containers/range.h" -#include "utils/fmt/vector.h" -#include "utils/stack_vector/stack_vector.h" -#include - -namespace FlexFlow { - -template -struct DimOrdered { - DimOrdered() {} - - DimOrdered(std::initializer_list const &l) - : contents(l.begin(), l.end()) {} - - DimOrdered(std::vector const &contents) - : contents(contents.begin(), contents.end()) {} - - template - DimOrdered(It begin, It end) : contents(begin, end) {} - - template - DimOrdered(stack_vector const &contents) - : contents(contents.begin(), contents.end()) {} - - T const &at(Idx idx) const { - nonnegative_int raw = idx.value; - return this->contents.at(raw.unwrap_nonnegative()); - } - - T &at(Idx idx) { - nonnegative_int raw = idx.value; - return this->contents.at(raw.unwrap_nonnegative()); - } - - T const &operator[](Idx idx) const { - return this->at(idx); - } - - T &operator[](Idx idx) { - return this->at(idx); - } - - bool idx_is_valid(Idx const &idx) const { - nonnegative_int raw = idx.value; - return (raw < this->contents.size()); - } - - bool operator==(DimOrdered const &other) const { - return this->contents == other.contents; - } - - bool operator!=(DimOrdered const &other) const { - return this->contents != other.contents; - } - - using iterator = typename stack_vector::iterator; - using const_iterator = - typename stack_vector::const_iterator; - using reverse_iterator = - typename stack_vector::reverse_iterator; - using const_reverse_iterator = - typename stack_vector::const_reverse_iterator; - using value_type = T; - using pointer = value_type *; - using const_pointer = value_type const *; - using reference = value_type &; - using const_reference = value_type const &; - - iterator begin() { - return this->contents.begin(); - } - - const_iterator begin() const { - return this->cbegin(); - } - - const_iterator cbegin() const { - return this->contents.cbegin(); - } - - iterator end() { - return this->contents.end(); - } - - const_iterator end() const { - return this->cend(); - } - - const_iterator cend() const { - return this->contents.cend(); - } - - reverse_iterator rbegin() { - return this->contents.rbegin(); - } - - const_reverse_iterator rbegin() const { - return this->crbegin(); - } - - const_reverse_iterator crbegin() const { - return this->contents.crbegin(); - } - - reverse_iterator rend() { - return this->contents.rend(); - } - - const_reverse_iterator rend() const { - return this->crend(); - } - - const_reverse_iterator crend() const { - return this->contents.crend(); - } - - size_t size() const { - return this->contents.size(); - } - - size_t empty() const { - return this->contents.empty(); - } - - size_t num_dims() const { - return this->size(); - } - - friend struct ::std::hash; - -private: - stack_vector contents; -}; - -template -auto operator<(DimOrdered const &lhs, DimOrdered const &rhs) - -> std::enable_if_t, bool> { - return std::lexicographical_compare( - lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()); -} - -template -std::string format_as(DimOrdered const &v) { - std::vector as_vec(v.cbegin(), v.cend()); - return fmt::format("", as_vec); -} - -template -std::ostream &operator<<(std::ostream &s, DimOrdered const &v) { - return (s << fmt::to_string(v)); -} - -} // namespace FlexFlow - -namespace nlohmann { -template -struct adl_serializer<::FlexFlow::DimOrdered> { - static ::FlexFlow::DimOrdered from_json(nlohmann::json const &j) { - return {j.template get>()}; - } - - static void to_json(nlohmann::json &j, - ::FlexFlow::DimOrdered const &x) { - j = std::vector{x.cbegin(), x.cend()}; - } -}; -} // namespace nlohmann - -namespace std { - -template -struct hash<::FlexFlow::DimOrdered> { - size_t operator()(::FlexFlow::DimOrdered const &t) const { - static_assert(::FlexFlow::is_hashable::value, - "Elements must be hashable"); - - return get_std_hash(t.contents); - } -}; - -} // namespace std - -namespace rc { - -template -struct Arbitrary<::FlexFlow::DimOrdered> { - static Gen<::FlexFlow::DimOrdered> arbitrary() { - return gen::construct<::FlexFlow::DimOrdered>( - gen::arbitrary<::FlexFlow::stack_vector>()); - } -}; - -} // namespace rc - -#endif diff --git a/lib/op-attrs/include/op-attrs/dim_ordered/slice.h b/lib/op-attrs/include/op-attrs/dim_ordered/slice.h deleted file mode 100644 index 76526447be..0000000000 --- a/lib/op-attrs/include/op-attrs/dim_ordered/slice.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_SLICE_H -#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_SLICE_H - -#include "op-attrs/dim_ordered/dim_ordered.h" -#include "utils/containers/slice.h" -#include "utils/containers/transform.h" -#include "utils/containers/vector_of.h" -#include "utils/optional.h" - -namespace FlexFlow { - -template -DimOrdered nonoverloaded_slice(DimOrdered const &d, - std::optional const &start, - std::optional const &end) { - auto to_raw_idx = [](std::optional const &idx) -> std::optional { - return transform(idx, [](Idx const &i) { return i.value; }); - }; - - return DimOrdered{ - slice(vector_of(d), to_raw_idx(start), to_raw_idx(end))}; -} -template -DimOrdered slice(DimOrdered const &d, - std::optional const &start = std::nullopt, - std::optional const &end = std::nullopt) { - return ff_dim_t_nonoverloaded_slice(d, start, end); -} - -} // namespace FlexFlow - -#endif diff --git a/lib/op-attrs/include/op-attrs/dim_ordered/transform.h b/lib/op-attrs/include/op-attrs/dim_ordered/transform.h deleted file mode 100644 index 4fd3df0abb..0000000000 --- a/lib/op-attrs/include/op-attrs/dim_ordered/transform.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_TRANSFORM_H -#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_TRANSFORM_H - -#include "op-attrs/dim_ordered/dim_ordered.h" -#include "utils/containers/vector_of.h" -#include "utils/containers/vector_transform.h" - -namespace FlexFlow { - -template -DimOrdered> - transform(DimOrdered const &d, F f) { - using Out = std::invoke_result_t; - - return DimOrdered{vector_transform(vector_of(d), f)}; -} - -} // namespace FlexFlow - -#endif diff --git a/lib/op-attrs/include/op-attrs/dim_ordered/zip.h b/lib/op-attrs/include/op-attrs/dim_ordered/zip.h deleted file mode 100644 index cc8b050f50..0000000000 --- a/lib/op-attrs/include/op-attrs/dim_ordered/zip.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_ZIP_H -#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_DIM_ORDERED_ZIP_H - -#include "op-attrs/dim_ordered/dim_ordered.h" -#include "utils/containers/vector_of.h" -#include "utils/containers/zip.h" - -namespace FlexFlow { - -template -DimOrdered> zip(DimOrdered const &lhs, - DimOrdered const &rhs) { - return DimOrdered>{ - zip(vector_of(lhs), vector_of(rhs))}; -} - -} // namespace FlexFlow - -#endif diff --git a/lib/op-attrs/include/op-attrs/ff_dim_t.h b/lib/op-attrs/include/op-attrs/ff_dim_t.h index 0979201f67..1411886eee 100644 --- a/lib/op-attrs/include/op-attrs/ff_dim_t.h +++ b/lib/op-attrs/include/op-attrs/ff_dim_t.h @@ -11,6 +11,8 @@ relative_ff_dim_t relative_ff_dim_t_from_ff_dim_t(ff_dim_t ff_dim); ff_dim_t add_to_ff_dim(ff_dim_t ff_dim, int value); +std::vector ff_dim_range(nonnegative_int num_elements); + } // namespace FlexFlow namespace rc { diff --git a/lib/op-attrs/include/op-attrs/ff_ordered/enumerate.h b/lib/op-attrs/include/op-attrs/ff_ordered/enumerate.h index bc8636615c..fbd828bf37 100644 --- a/lib/op-attrs/include/op-attrs/ff_ordered/enumerate.h +++ b/lib/op-attrs/include/op-attrs/ff_ordered/enumerate.h @@ -3,7 +3,7 @@ #include "op-attrs/ff_ordered/ff_ordered.h" #include "utils/bidict/bidict.h" -#include "utils/containers/count.h" +#include "utils/containers/range.h" namespace FlexFlow { @@ -18,7 +18,7 @@ namespace FlexFlow { template std::map enumerate(FFOrdered const &ff_ordered) { std::map result; - for (int raw_ff_dim : count(ff_ordered.size())) { + for (int raw_ff_dim : range(ff_ordered.size())) { ff_dim_t ff_dim = ff_dim_t{nonnegative_int{raw_ff_dim}}; result.insert({ff_dim, ff_ordered.at(ff_dim)}); } diff --git a/lib/op-attrs/include/op-attrs/ff_ordered/get_idxs.h b/lib/op-attrs/include/op-attrs/ff_ordered/get_idxs.h index 5ff390d3fe..1ee9f6b51c 100644 --- a/lib/op-attrs/include/op-attrs/ff_ordered/get_idxs.h +++ b/lib/op-attrs/include/op-attrs/ff_ordered/get_idxs.h @@ -3,14 +3,15 @@ #include "op-attrs/ff_dim_t.h" #include "op-attrs/ff_ordered/ff_ordered.h" -#include "utils/containers/count.h" +#include "utils/containers/range.h" +#include "utils/containers/set_of.h" #include "utils/containers/transform.h" namespace FlexFlow { template -std::vector get_idxs(FFOrdered const &d) { - return transform(count(d.size()), +std::set get_idxs(FFOrdered const &d) { + return transform(set_of(range(d.size())), [](int i) { return ff_dim_t{nonnegative_int{i}}; }); } diff --git a/lib/op-attrs/include/op-attrs/ff_ordered/map_from_ff_ordered.h b/lib/op-attrs/include/op-attrs/ff_ordered/map_from_ff_ordered.h new file mode 100644 index 0000000000..9d789dd486 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/ff_ordered/map_from_ff_ordered.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_FF_ORDERED_MAP_FROM_FF_ORDERED_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_FF_ORDERED_MAP_FROM_FF_ORDERED_H + +#include "op-attrs/ff_ordered/ff_ordered.h" +#include "utils/nonnegative_int/num_elements.h" +#include "op-attrs/ff_dim_t.h" + +namespace FlexFlow { + +template +std::unordered_map map_from_ff_ordered(FFOrdered const &m) { + std::unordered_map result; + + for (ff_dim_t d : ff_dim_range(num_elements(m))) { + result.insert({d, m.at(d)}); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/get_incoming_tensor_roles.h b/lib/op-attrs/include/op-attrs/get_incoming_tensor_roles.h index b395736773..8b9272265f 100644 --- a/lib/op-attrs/include/op-attrs/get_incoming_tensor_roles.h +++ b/lib/op-attrs/include/op-attrs/get_incoming_tensor_roles.h @@ -8,9 +8,9 @@ namespace FlexFlow { std::vector - get_incoming_tensor_roles(ComputationGraphOpAttrs const &, int num_inputs); + get_incoming_tensor_roles(ComputationGraphOpAttrs const &, nonnegative_int num_inputs); std::vector - get_incoming_tensor_roles(PCGOperatorAttrs const &, int num_inputs); + get_incoming_tensor_roles(PCGOperatorAttrs const &, nonnegative_int num_inputs); } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/get_operator_space_to_parallel_tensor_space_mappings.h b/lib/op-attrs/include/op-attrs/get_operator_space_to_parallel_tensor_space_mappings.h new file mode 100644 index 0000000000..8634341755 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/get_operator_space_to_parallel_tensor_space_mappings.h @@ -0,0 +1,44 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_GET_OPERATOR_SPACE_TO_PARALLEL_TENSOR_SPACE_MAPPINGS_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_GET_OPERATOR_SPACE_TO_PARALLEL_TENSOR_SPACE_MAPPINGS_H + +#include "op-attrs/computation_graph_op_attrs.dtg.h" +#include "op-attrs/incoming_tensor_role.dtg.h" +#include "op-attrs/num_ptensor_parallel_dims_t.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/tensor_role.dtg.h" +#include + +namespace FlexFlow { + +std::vector + get_operator_to_incoming_mappings(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees); + + +std::vector + get_operator_to_incoming_mappings_for_role(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees, + IncomingTensorRole role); + +std::vector + get_operator_to_input_mappings(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees); + +std::vector + get_operator_to_weight_mappings(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees); + +std::vector + get_operator_to_output_mappings(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees); + +std::vector + get_operator_to_ptensor_mappings_for_role(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees, + TensorRole role); + + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/get_operator_task_space.h b/lib/op-attrs/include/op-attrs/get_operator_task_space.h new file mode 100644 index 0000000000..9239333fe8 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/get_operator_task_space.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_GET_OPERATOR_TASK_SPACE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_GET_OPERATOR_TASK_SPACE_H + +#include "op-attrs/computation_graph_op_attrs.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" + +namespace FlexFlow { + +OperatorTaskSpace + get_operator_task_space(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/num_ptensor_parallel_dims_t.h b/lib/op-attrs/include/op-attrs/num_ptensor_parallel_dims_t.h new file mode 100644 index 0000000000..b64581dcca --- /dev/null +++ b/lib/op-attrs/include/op-attrs/num_ptensor_parallel_dims_t.h @@ -0,0 +1,70 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_NUM_PTENSOR_PARALLEL_DIMS_T_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_NUM_PTENSOR_PARALLEL_DIMS_T_H + +#include +#include +#include +#include +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/positive_int/positive_int.h" + +namespace FlexFlow { + +struct num_ptensor_parallel_dims_t { +public: + num_ptensor_parallel_dims_t() = delete; + explicit num_ptensor_parallel_dims_t(int); + explicit num_ptensor_parallel_dims_t(nonnegative_int); + explicit num_ptensor_parallel_dims_t(positive_int); + + bool operator<(num_ptensor_parallel_dims_t const &other) const; + bool operator==(num_ptensor_parallel_dims_t const &other) const; + bool operator>(num_ptensor_parallel_dims_t const &other) const; + bool operator<=(num_ptensor_parallel_dims_t const &other) const; + bool operator!=(num_ptensor_parallel_dims_t const &other) const; + bool operator>=(num_ptensor_parallel_dims_t const &other) const; + + int int_from_num_ptensor_parallel_dims() const; + nonnegative_int nonnegative_int_from_num_ptensor_parallel_dims() const; + positive_int positive_int_from_num_ptensor_parallel_dims() const; + +private: + int value; +private: + void check_invariant() const; +}; + +std::ostream &operator<<(std::ostream &, num_ptensor_parallel_dims_t const &); +std::string format_as(num_ptensor_parallel_dims_t const &); + +} // namespace FlexFlow + +namespace nlohmann { + +template <> +struct adl_serializer<::FlexFlow::num_ptensor_parallel_dims_t> { + static ::FlexFlow::num_ptensor_parallel_dims_t from_json(json const &j); + static void to_json(json &j, ::FlexFlow::num_ptensor_parallel_dims_t t); +}; + +} // namespace nlohmann + +namespace rc { + +template <> +struct Arbitrary<::FlexFlow::num_ptensor_parallel_dims_t> { + static Gen<::FlexFlow::num_ptensor_parallel_dims_t> arbitrary(); +}; + +} // namespace rc + +namespace std { + +template <> +struct hash<::FlexFlow::num_ptensor_parallel_dims_t> { + size_t operator()(::FlexFlow::num_ptensor_parallel_dims_t const &) const noexcept; +}; + +} // namespace std + +#endif diff --git a/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.h b/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.h new file mode 100644 index 0000000000..28c0b31af9 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_NUM_PTENSOR_SHARD_DIMS_T_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_NUM_PTENSOR_SHARD_DIMS_T_H + +#include "op-attrs/num_ptensor_shard_dims_t.dtg.h" +#include "op-attrs/num_ptensor_parallel_dims_t.h" + +namespace FlexFlow { + +num_ptensor_parallel_dims_t num_ptensor_parallel_dims_from_shard_dims(num_ptensor_shard_dims_t); +num_ptensor_shard_dims_t num_ptensor_shard_dims_from_parallel_dims(num_ptensor_parallel_dims_t); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.struct.toml b/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.struct.toml new file mode 100644 index 0000000000..03e85f5119 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.struct.toml @@ -0,0 +1,25 @@ +namespace = "FlexFlow" +name = "num_ptensor_shard_dims_t" +features = [ + "eq", + "ord", + "hash", + "fmt", + "json", +] + +doctstring = """\ +A wrapper type describing the number of shard dims (i.e., not including replia dims) in a parallel tensor, +to prevent accidentally confusing the number of shard dims and the total number of parallel dims. + +The conversion to/from @ref num_ptensor_parallel_dims_t is trivial, and provided by the +functions @ref num_ptensor_parallel_dims_from_shard_dims and @ref num_ptensor_shard_dims_from_parallel_dims. +""" + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + +[[fields]] +name = "value" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/num_tensor_dims_t.h b/lib/op-attrs/include/op-attrs/num_tensor_dims_t.h new file mode 100644 index 0000000000..9c48725bd5 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/num_tensor_dims_t.h @@ -0,0 +1,78 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_NUM_TENSOR_DIMS_T_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_NUM_TENSOR_DIMS_T_H + +#include "op-attrs/num_ptensor_shard_dims_t.dtg.h" +#include "op-attrs/num_ptensor_parallel_dims_t.h" +#include "op-attrs/ff_dim_t.dtg.h" +#include "op-attrs/relative_ff_dim_t.dtg.h" + +namespace FlexFlow { + +struct num_tensor_dims_t { +public: + num_tensor_dims_t() = delete; + explicit num_tensor_dims_t(nonnegative_int); + + bool operator<(num_tensor_dims_t other) const; + bool operator==(num_tensor_dims_t other) const; + bool operator>(num_tensor_dims_t other) const; + bool operator<=(num_tensor_dims_t other) const; + bool operator!=(num_tensor_dims_t other) const; + bool operator>=(num_tensor_dims_t other) const; + + bool operator<(nonnegative_int other) const; + bool operator==(nonnegative_int other) const; + bool operator>(nonnegative_int other) const; + bool operator<=(nonnegative_int other) const; + bool operator!=(nonnegative_int other) const; + bool operator>=(nonnegative_int other) const; + + friend bool operator<(nonnegative_int lhs, num_tensor_dims_t rhs); + friend bool operator==(nonnegative_int lhs, num_tensor_dims_t rhs); + friend bool operator>(nonnegative_int lhs, num_tensor_dims_t rhs); + friend bool operator<=(nonnegative_int lhs, num_tensor_dims_t rhs); + friend bool operator!=(nonnegative_int lhs, num_tensor_dims_t rhs); + friend bool operator>=(nonnegative_int lhs, num_tensor_dims_t rhs); + + bool operator<(int other) const; + bool operator==(int other) const; + bool operator>(int other) const; + bool operator<=(int other) const; + bool operator!=(int other) const; + bool operator>=(int other) const; + + friend bool operator<(int lhs, num_tensor_dims_t rhs); + friend bool operator==(int lhs, num_tensor_dims_t rhs); + friend bool operator>(int lhs, num_tensor_dims_t rhs); + friend bool operator<=(int lhs, num_tensor_dims_t rhs); + friend bool operator!=(int lhs, num_tensor_dims_t rhs); + friend bool operator>=(int lhs, num_tensor_dims_t rhs); + + nonnegative_int nonnegative_int_from_num_tensor_dims() const; + int int_from_num_tensor_dims() const; + +private: + nonnegative_int value; + +private: + void check_invariant() const; +}; + +nonnegative_int format_as(num_tensor_dims_t); +std::ostream &operator<<(std::ostream &, num_tensor_dims_t); + +num_tensor_dims_t + num_tensor_dims_from_num_ptensor_shard_dims(num_ptensor_shard_dims_t); + +num_tensor_dims_t num_tensor_dims_from_num_ptensor_parallel_dims(num_ptensor_parallel_dims_t); + +num_ptensor_shard_dims_t num_ptensor_shard_dims_from_num_tensor_dims(num_tensor_dims_t); + +num_ptensor_parallel_dims_t num_ptensor_parallel_dims_from_num_tensor_dims(num_tensor_dims_t); + +std::vector tensor_dims_range(num_tensor_dims_t); +std::vector relative_tensor_dims_range(num_tensor_dims_t); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/operator_attrs.h b/lib/op-attrs/include/op-attrs/operator_attrs.h index d94f7af4fb..a6a1c35ca7 100644 --- a/lib/op-attrs/include/op-attrs/operator_attrs.h +++ b/lib/op-attrs/include/op-attrs/operator_attrs.h @@ -1,5 +1,5 @@ -#ifndef _OPERATOR_PARAMS_H -#define _OPERATOR_PARAMS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_ATTRS_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_ATTRS_H #include "op-attrs/ops/attention.h" #include "op-attrs/ops/batch_matmul.h" @@ -9,7 +9,6 @@ #include "op-attrs/ops/combine.h" #include "op-attrs/ops/concat.h" #include "op-attrs/ops/conv_2d.h" -#include "op-attrs/ops/core.h" #include "op-attrs/ops/dropout.h" #include "op-attrs/ops/element_binary.h" #include "op-attrs/ops/element_unary.h" diff --git a/lib/op-attrs/include/op-attrs/operator_space_to_parallel_tensor_space_mapping.h b/lib/op-attrs/include/op-attrs/operator_space_to_parallel_tensor_space_mapping.h new file mode 100644 index 0000000000..8279fad706 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_space_to_parallel_tensor_space_mapping.h @@ -0,0 +1,51 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_SPACE_TO_PARALLEL_TENSOR_SPACE_MAPPING_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_SPACE_TO_PARALLEL_TENSOR_SPACE_MAPPING_H + +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/parallel_tensor_space_coordinate.dtg.h" +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/task_space_coordinate.dtg.h" +#include "op-attrs/num_ptensor_parallel_dims_t.h" + +namespace FlexFlow { + +OperatorSpaceToParallelTensorSpaceMapping + empty_operator_space_to_ptensor_space_map(); + +OperatorTaskSpace + get_operator_task_space_for_mapping(OperatorSpaceToParallelTensorSpaceMapping const &); + +ParallelTensorDimDegrees + get_parallel_tensor_space_for_mapping(OperatorSpaceToParallelTensorSpaceMapping const &); + +OperatorSpaceToParallelTensorSpaceMapping + get_identity_mapping( + OperatorTaskSpace const &operator_task_space, + ParallelTensorDimDegrees const ¶llel_tensor_dim_degrees); + +OperatorSpaceToParallelTensorSpaceMapping + operator_ptensor_space_mapping_from_projection( + DimProjection const &projection, + OperatorTaskSpace const &op_task_space, + ParallelTensorDimDegrees const ¶llel_tensor_dim_degrees); + +OperatorSpaceToParallelTensorSpaceMapping + operator_ptensor_space_mapping_from_composition( + OperatorSpaceToParallelTensorSpaceMapping const &op_to_pt1_mapping, + ParallelTensorSpaceToParallelTensorSpaceMapping const &pt1_to_pt2_mapping); + +ParallelTensorSpaceCoordinate + ptensor_coord_for_task_space_coord( + OperatorSpaceToParallelTensorSpaceMapping const &mapping, + TaskSpaceCoordinate const &task_space_coord); + +TaskSpaceCoordinate + task_space_coord_for_ptensor_coord( + OperatorSpaceToParallelTensorSpaceMapping const &mapping, + ParallelTensorSpaceCoordinate const &tensor_space_coordinate); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/operator_space_to_parallel_tensor_space_mapping.struct.toml b/lib/op-attrs/include/op-attrs/operator_space_to_parallel_tensor_space_mapping.struct.toml new file mode 100644 index 0000000000..0f7880d052 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_space_to_parallel_tensor_space_mapping.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "OperatorSpaceToParallelTensorSpaceMapping" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "utils/orthotope/dim_domain_mapping.h", + "op-attrs/operator_task_space_dim_idx_t.dtg.h", + "op-attrs/parallel_tensor_dim_idx_t.dtg.h", +] + +[[fields]] +name = "raw_mapping" +type = "::FlexFlow::DimDomainMapping<::FlexFlow::operator_task_space_dim_idx_t, ::FlexFlow::parallel_tensor_dim_idx_t>" diff --git a/lib/op-attrs/include/op-attrs/operator_task_space.h b/lib/op-attrs/include/op-attrs/operator_task_space.h new file mode 100644 index 0000000000..34a4bc291c --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_task_space.h @@ -0,0 +1,50 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_TASK_SPACE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_TASK_SPACE_H + +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/operator_task_space_dim_idx_t.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/task_space_coordinate.dtg.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/dim_ordering.dtg.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" +#include + +namespace FlexFlow { + +OperatorTaskSpace trivial_op_task_space(); + +std::unordered_set + operator_task_space_get_dim_idxs(OperatorTaskSpace const &); + +std::unordered_set + get_task_space_coordinates(OperatorTaskSpace const &operator_task_space); + +bool operator_task_space_contains_coord(OperatorTaskSpace const &, + TaskSpaceCoordinate const &); + +TaskSpaceCoordinate + get_task_space_maximum_coordinate(OperatorTaskSpace const &operator_task_space); + +nonnegative_int op_task_space_num_dims(OperatorTaskSpace const &operator_task_space); +positive_int num_tasks(OperatorTaskSpace const &operator_task_space); + +positive_int op_task_space_dim_size_for_idx(OperatorTaskSpace const &, + operator_task_space_dim_idx_t); + +MinimalDimDomain + minimal_dim_domain_from_operator_task_space(OperatorTaskSpace const &); + +OperatorTaskSpace + operator_task_space_from_minimal_dim_domain(MinimalDimDomain const &); + +DimOrdering + get_operator_task_space_dim_ordering(); + +OperatorTaskSpace + get_operator_task_space_matching_parallel_tensor_dim_degrees( + ParallelTensorDimDegrees const &dim_degrees); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/operator_task_space.struct.toml b/lib/op-attrs/include/op-attrs/operator_task_space.struct.toml new file mode 100644 index 0000000000..e0ed0303ad --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_task_space.struct.toml @@ -0,0 +1,19 @@ +namespace = "FlexFlow" +name = "OperatorTaskSpace" +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +includes = [ + "", + "utils/orthotope/minimal_orthotope.dtg.h", +] + +[[fields]] +name = "degrees" +type = "::FlexFlow::MinimalOrthotope" diff --git a/lib/op-attrs/include/op-attrs/operator_task_space_dim_idx_t.h b/lib/op-attrs/include/op-attrs/operator_task_space_dim_idx_t.h new file mode 100644 index 0000000000..30a6845734 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_task_space_dim_idx_t.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_TASK_SPACE_DIM_IDX_T_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_TASK_SPACE_DIM_IDX_T_H + +#include "op-attrs/operator_task_space_dim_idx_t.dtg.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include + +namespace FlexFlow { + +std::set + operator_task_space_dim_idx_range(nonnegative_int end); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/operator_task_space_dim_idx_t.struct.toml b/lib/op-attrs/include/op-attrs/operator_task_space_dim_idx_t.struct.toml new file mode 100644 index 0000000000..95e4b72977 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_task_space_dim_idx_t.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "operator_task_space_dim_idx_t" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + +[[fields]] +name = "raw_idx" +type = "::FlexFlow::nonnegative_int" diff --git a/lib/op-attrs/include/op-attrs/operator_task_space_to_operator_task_space_mapping.h b/lib/op-attrs/include/op-attrs/operator_task_space_to_operator_task_space_mapping.h new file mode 100644 index 0000000000..5de62e6739 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_task_space_to_operator_task_space_mapping.h @@ -0,0 +1,30 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_TASK_SPACE_TO_OPERATOR_TASK_SPACE_MAPPING_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPERATOR_TASK_SPACE_TO_OPERATOR_TASK_SPACE_MAPPING_H + +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/operator_task_space_to_operator_task_space_mapping.dtg.h" +#include "op-attrs/task_space_coordinate.dtg.h" + +namespace FlexFlow { + +OperatorTaskSpaceToOperatorTaskSpaceMapping op_to_op_identity_mapping(OperatorTaskSpace const &, + OperatorTaskSpace const &); + +OperatorTaskSpace + op_mapping_get_src_space(OperatorTaskSpaceToOperatorTaskSpaceMapping const &); + +OperatorTaskSpace + op_mapping_get_dst_space(OperatorTaskSpaceToOperatorTaskSpaceMapping const &); + +bidict + op_to_op_get_coord_mapping(OperatorTaskSpaceToOperatorTaskSpaceMapping const &); + +OperatorTaskSpaceToOperatorTaskSpaceMapping + op_to_op_mapping_from_composition_through_tensor( + OperatorSpaceToParallelTensorSpaceMapping const &src_to_tensor_mapping, + OperatorSpaceToParallelTensorSpaceMapping const &dst_to_tensor_mapping); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/operator_task_space_to_operator_task_space_mapping.struct.toml b/lib/op-attrs/include/op-attrs/operator_task_space_to_operator_task_space_mapping.struct.toml new file mode 100644 index 0000000000..33ecabb708 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/operator_task_space_to_operator_task_space_mapping.struct.toml @@ -0,0 +1,16 @@ +namespace = "FlexFlow" +name = "OperatorTaskSpaceToOperatorTaskSpaceMapping" +features = [ + "eq", + "hash", + "fmt" +] + +includes = [ + "utils/orthotope/dim_domain_mapping.h", + "op-attrs/operator_task_space_dim_idx_t.dtg.h", +] + +[[fields]] +name = "raw_mapping" +type = "::FlexFlow::DimDomainMapping<::FlexFlow::operator_task_space_dim_idx_t, ::FlexFlow::operator_task_space_dim_idx_t>" diff --git a/lib/op-attrs/include/op-attrs/ops/attention.h b/lib/op-attrs/include/op-attrs/ops/attention.h index 5ca237561f..9407cc6942 100644 --- a/lib/op-attrs/include/op-attrs/ops/attention.h +++ b/lib/op-attrs/include/op-attrs/ops/attention.h @@ -1,12 +1,11 @@ -#ifndef _FLEXFLOW_ATTENTION_ATTRS_H -#define _FLEXFLOW_ATTENTION_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_ATTENTION_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_ATTENTION_H #include "op-attrs/incoming_tensor_role.dtg.h" #include "op-attrs/initializer_attrs.dtg.h" #include "op-attrs/ops/attention/multihead_attention_inputs.dtg.h" #include "op-attrs/ops/attention/multihead_attention_parallel_inputs.dtg.h" #include "op-attrs/ops/attention_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" #include @@ -123,7 +122,6 @@ tl::expected, std::string> get_initializers( std::optional const &output_bias_initializer = std::nullopt); -CHECK_VALID_OP_ATTR(MultiHeadAttentionAttrs); } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h index 333da4fa29..f17757ac85 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul.h @@ -2,15 +2,12 @@ #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_BATCH_MATMUL_H #include "op-attrs/ops/batch_matmul_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" #include namespace FlexFlow { -CHECK_VALID_OP_ATTR(BatchMatmulAttrs); - bool is_valid(BatchMatmulAttrs const &, ParallelTensorShape const &, ParallelTensorShape const &); diff --git a/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml index 394dfb5fcc..0ec3f3e319 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/batch_matmul_attrs.struct.toml @@ -11,7 +11,7 @@ features = [ ] includes = [ - "utils/nonnegative_int/nonnegative_int.h", + "utils/positive_int/positive_int.h", "", ] @@ -23,8 +23,8 @@ src_includes = [ [[fields]] name = "a_seq_length_dim" -type = "std::optional<::FlexFlow::nonnegative_int>" +type = "std::optional<::FlexFlow::positive_int>" [[fields]] name = "b_seq_length_dim" -type = "std::optional<::FlexFlow::nonnegative_int>" +type = "std::optional<::FlexFlow::positive_int>" diff --git a/lib/op-attrs/include/op-attrs/ops/batch_norm.h b/lib/op-attrs/include/op-attrs/ops/batch_norm.h index bcf6794f38..35d6cb496d 100644 --- a/lib/op-attrs/include/op-attrs/ops/batch_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/batch_norm.h @@ -4,10 +4,10 @@ #include "op-attrs/incoming_tensor_role.dtg.h" #include "op-attrs/initializer_attrs.dtg.h" #include "op-attrs/ops/batch_norm_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" +#include namespace FlexFlow { @@ -61,8 +61,6 @@ tl::expected, std::string> tl::expected, std::string> get_initializers(BatchNormAttrs const &attrs); -CHECK_VALID_OP_ATTR(BatchNormAttrs); - } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/broadcast.h b/lib/op-attrs/include/op-attrs/ops/broadcast.h index 4fd7d49234..9b6bd49418 100644 --- a/lib/op-attrs/include/op-attrs/ops/broadcast.h +++ b/lib/op-attrs/include/op-attrs/ops/broadcast.h @@ -2,15 +2,13 @@ #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_BROADCAST_H #include "op-attrs/ops/broadcast_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" #include "utils/record_formatter.h" +#include namespace FlexFlow { -CHECK_VALID_OP_ATTR(BroadcastAttrs); - RecordFormatter as_dot(BroadcastAttrs const &); tl::expected get_output_shape(BroadcastAttrs const &, diff --git a/lib/op-attrs/include/op-attrs/ops/cast.h b/lib/op-attrs/include/op-attrs/ops/cast.h index 30818f046d..38a1e87a76 100644 --- a/lib/op-attrs/include/op-attrs/ops/cast.h +++ b/lib/op-attrs/include/op-attrs/ops/cast.h @@ -1,8 +1,7 @@ -#ifndef _FLEXFLOW_CAST_ATTRS_H -#define _FLEXFLOW_CAST_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_CAST_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_CAST_H #include "op-attrs/ops/cast_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" #include "utils/record_formatter.h" @@ -10,8 +9,6 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(CastAttrs); - RecordFormatter as_dot(CastAttrs const &); tl::expected get_output_shape(CastAttrs const &, diff --git a/lib/op-attrs/include/op-attrs/ops/combine.h b/lib/op-attrs/include/op-attrs/ops/combine.h index d9ca314c2b..6839bc12e1 100644 --- a/lib/op-attrs/include/op-attrs/ops/combine.h +++ b/lib/op-attrs/include/op-attrs/ops/combine.h @@ -2,15 +2,12 @@ #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_COMBINE_H #include "op-attrs/ops/combine_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "utils/record_formatter.h" #include namespace FlexFlow { -CHECK_VALID_OP_ATTR(CombineAttrs); - RecordFormatter as_dot(CombineAttrs const &); tl::expected diff --git a/lib/op-attrs/include/op-attrs/ops/concat.h b/lib/op-attrs/include/op-attrs/ops/concat.h index f07f06df85..1647553b96 100644 --- a/lib/op-attrs/include/op-attrs/ops/concat.h +++ b/lib/op-attrs/include/op-attrs/ops/concat.h @@ -2,14 +2,12 @@ #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_CONCAT_H #include "op-attrs/ops/concat_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" +#include namespace FlexFlow { -CHECK_VALID_OP_ATTR(ConcatAttrs); - tl::expected get_output_shape(ConcatAttrs const &, std::vector const &); tl::expected diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h index e4c7467de2..5ae4649571 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h @@ -1,17 +1,14 @@ -#ifndef _FLEXFLOW_CONV_2D_ATTRS_H -#define _FLEXFLOW_CONV_2D_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_CONV_2D_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_CONV_2D_H #include "op-attrs/incoming_tensor_role.dtg.h" #include "op-attrs/initializer_attrs.dtg.h" #include "op-attrs/ops/conv_2d_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.h" #include "op-attrs/tensor_shape.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(Conv2DAttrs); - std::vector get_conv2d_incoming_tensor_roles(Conv2DAttrs const &); diff --git a/lib/op-attrs/include/op-attrs/ops/core.h b/lib/op-attrs/include/op-attrs/ops/core.h deleted file mode 100644 index 611b53def5..0000000000 --- a/lib/op-attrs/include/op-attrs/ops/core.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _FLEXFLOW_OPATTRS_INCLUDE_OPATTRS_OPS_CORE_H -#define _FLEXFLOW_OPATTRS_INCLUDE_OPATTRS_OPS_CORE_H - -#include "utils/type_traits.h" - -namespace FlexFlow { - -#define CHECK_VALID_OP_ATTR(TYPENAME) CHECK_WELL_BEHAVED_VALUE_TYPE(TYPENAME) - -template -using is_valid_opattr = is_well_behaved_value_type; - -} // namespace FlexFlow - -#endif diff --git a/lib/op-attrs/include/op-attrs/ops/dropout.h b/lib/op-attrs/include/op-attrs/ops/dropout.h index 86e5db4d77..d5f3ae0c0d 100644 --- a/lib/op-attrs/include/op-attrs/ops/dropout.h +++ b/lib/op-attrs/include/op-attrs/ops/dropout.h @@ -1,10 +1,10 @@ -#ifndef _FLEXFLOW_DROPOUT_ATTRS_H -#define _FLEXFLOW_DROPOUT_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_DROPOUT_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_DROPOUT_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/dropout_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" +#include namespace FlexFlow { @@ -12,8 +12,6 @@ TensorShape get_output_shape(DropoutAttrs const &, TensorShape const &); tl::expected get_output_shape(DropoutAttrs const &, ParallelTensorShape const &); -CHECK_VALID_OP_ATTR(DropoutAttrs); - } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/element_binary.h b/lib/op-attrs/include/op-attrs/ops/element_binary.h index d51c3a3afa..be098c34bb 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_binary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_binary.h @@ -1,21 +1,47 @@ -#ifndef _FLEXFLOW_ELEMENT_BINARY_ATTRS_H -#define _FLEXFLOW_ELEMENT_BINARY_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_ELEMENT_BINARY_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_ELEMENT_BINARY_H -#include "op-attrs/ops/core.h" +#include "op-attrs/num_ptensor_parallel_dims_t.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" #include "op-attrs/ops/element_binary_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.h" -#include namespace FlexFlow { -tl::expected get_output_shape( - ElementBinaryAttrs const &, TensorShape const &, TensorShape const &); -tl::expected - get_output_shape(ElementBinaryAttrs const &, - ParallelTensorShape const &, - ParallelTensorShape const &); +TensorShape get_output_shape(ElementBinaryAttrs const &, + TensorShape const &, + TensorShape const &); +ParallelTensorShape get_output_shape(ElementBinaryAttrs const &, + ParallelTensorShape const &, + ParallelTensorShape const &); + +ParallelTensorDimDegrees + get_output_parallel_dim_degrees(ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees); + +OperatorTaskSpace + get_operator_task_space(ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees); + + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_lhs_input_mapping( + ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_rhs_input_mapping( + ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_output_mapping( + ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees); -CHECK_VALID_OP_ATTR(ElementBinaryAttrs); } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/ops/element_unary.h b/lib/op-attrs/include/op-attrs/ops/element_unary.h index 1a965b2c51..fefca9da19 100644 --- a/lib/op-attrs/include/op-attrs/ops/element_unary.h +++ b/lib/op-attrs/include/op-attrs/ops/element_unary.h @@ -1,8 +1,10 @@ -#ifndef _FLEXFLOW_ELEMENTARY_UNARY_ATTRS_H -#define _FLEXFLOW_ELEMENTARY_UNARY_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_ELEMENT_UNARY_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_ELEMENT_UNARY_H -#include "op-attrs/ops/core.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" #include "op-attrs/ops/element_unary_attrs.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" #include @@ -11,12 +13,26 @@ namespace FlexFlow { ElementUnaryAttrs make_relu_attrs(); -tl::expected +TensorShape get_output_shape(ElementUnaryAttrs const &, TensorShape const &); -tl::expected +ParallelTensorShape get_output_shape(ElementUnaryAttrs const &, ParallelTensorShape const &); -CHECK_VALID_OP_ATTR(ElementUnaryAttrs); +ParallelTensorDimDegrees get_output_parallel_dim_degrees( + ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorTaskSpace get_operator_task_space(ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_input_mapping( + ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_output_mapping( + ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/ops/embedding.h b/lib/op-attrs/include/op-attrs/ops/embedding.h index d44adf5f54..8bebf23488 100644 --- a/lib/op-attrs/include/op-attrs/ops/embedding.h +++ b/lib/op-attrs/include/op-attrs/ops/embedding.h @@ -1,8 +1,7 @@ -#ifndef _FLEXFLOW_EMBEDDING_ATTRS_H -#define _FLEXFLOW_EMBEDDING_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_EMBEDDING_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_EMBEDDING_H #include "op-attrs/initializer_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/ops/embedding_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.h" #include "op-attrs/tensor_shape.h" @@ -11,8 +10,6 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(EmbeddingAttrs); - RecordFormatter as_dot(EmbeddingAttrs const &); tl::expected get_output_shape(EmbeddingAttrs const &, diff --git a/lib/op-attrs/include/op-attrs/ops/flat.h b/lib/op-attrs/include/op-attrs/ops/flat.h index 710cbdb44b..ac03d7c7a9 100644 --- a/lib/op-attrs/include/op-attrs/ops/flat.h +++ b/lib/op-attrs/include/op-attrs/ops/flat.h @@ -1,7 +1,6 @@ -#ifndef _FLEXFLOW_FLAT_ATTRS_H -#define _FLEXFLOW_FLAT_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_FLAT_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_FLAT_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/flat_attrs.dtg.h" #include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" @@ -9,14 +8,12 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(FlatAttrs); - TensorShape get_output_shape(FlatAttrs const &, TensorShape const &); -tl::expected +ParallelTensorDimDegrees get_output_parallel_dim_degrees(FlatAttrs const &, ParallelTensorDimDegrees const &); -tl::expected - get_output_shape(FlatAttrs const &, ParallelTensorShape const &); +ParallelTensorShape get_output_shape(FlatAttrs const &, + ParallelTensorShape const &); } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/ops/gather.h b/lib/op-attrs/include/op-attrs/ops/gather.h index 42efd13b60..3b67b9130b 100644 --- a/lib/op-attrs/include/op-attrs/ops/gather.h +++ b/lib/op-attrs/include/op-attrs/ops/gather.h @@ -1,14 +1,11 @@ -#ifndef _FLEXFLOW_GATHER_ATTRS_H -#define _FLEXFLOW_GATHER_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_GATHER_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_GATHER_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/gather_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(GatherAttrs); - TensorShape get_output_shape(GatherAttrs const &, TensorShape const &input, TensorShape const &index); diff --git a/lib/op-attrs/include/op-attrs/ops/input.h b/lib/op-attrs/include/op-attrs/ops/input.h index fe92c77a52..f12f2ebec1 100644 --- a/lib/op-attrs/include/op-attrs/ops/input.h +++ b/lib/op-attrs/include/op-attrs/ops/input.h @@ -1,18 +1,22 @@ #ifndef _FLEXFLOW_OP_ATTRS_OPS_OP_ATTRS_INPUT_H #define _FLEXFLOW_OP_ATTRS_OPS_OP_ATTRS_INPUT_H -#include "op-attrs/ops/core.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" #include "op-attrs/ops/input_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(InputAttrs); - TensorShape get_output_shape(InputAttrs const &); ParallelTensorShape get_output_parallel_tensor_shape(InputAttrs const &); +OperatorTaskSpace get_operator_task_space(InputAttrs const &); + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(InputAttrs const &); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/layer_norm.h b/lib/op-attrs/include/op-attrs/ops/layer_norm.h index 4dcbeb665e..1d2cb14e99 100644 --- a/lib/op-attrs/include/op-attrs/ops/layer_norm.h +++ b/lib/op-attrs/include/op-attrs/ops/layer_norm.h @@ -1,12 +1,12 @@ -#ifndef _FLEXFLOW_OP_META_OPS_LAYER_NORM_ATTRS_H -#define _FLEXFLOW_OP_META_OPS_LAYER_NORM_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_LAYER_NORM_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_LAYER_NORM_H #include "op-attrs/incoming_tensor_role.dtg.h" #include "op-attrs/initializer_attrs.dtg.h" -#include "op-attrs/ops/core.h" #include "op-attrs/ops/layer_norm_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" +#include namespace FlexFlow { @@ -44,8 +44,6 @@ tl::expected, std::string> */ std::vector get_initializers(LayerNormAttrs const &attrs); -CHECK_VALID_OP_ATTR(LayerNormAttrs); - } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/linear.h b/lib/op-attrs/include/op-attrs/ops/linear.h index 107f772e03..d9b7d96a13 100644 --- a/lib/op-attrs/include/op-attrs/ops/linear.h +++ b/lib/op-attrs/include/op-attrs/ops/linear.h @@ -1,11 +1,15 @@ -#ifndef _FLEXFLOW_LINEAR_ATTRS_H -#define _FLEXFLOW_LINEAR_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_LINEAR_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_LINEAR_H #include "op-attrs/incoming_tensor_role.dtg.h" #include "op-attrs/initializer_attrs.dtg.h" -#include "op-attrs/ops/core.h" +#include "op-attrs/num_ptensor_parallel_dims_t.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" #include "op-attrs/ops/linear_attrs.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.dtg.h" #include "op-attrs/tensor_shape.dtg.h" #include "utils/record_formatter.h" #include @@ -15,8 +19,6 @@ namespace FlexFlow { std::vector get_linear_incoming_tensor_roles(LinearAttrs const &); -CHECK_VALID_OP_ATTR(LinearAttrs); - RecordFormatter as_dot(LinearAttrs const &); tl::expected @@ -29,6 +31,16 @@ tl::expected tl::expected, std::string> get_weight_shapes(LinearAttrs const &attrs, TensorShape const &input_shape); +ParallelTensorDimDegrees + get_projection_parallel_dim_degrees(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input); +ParallelTensorDimDegrees + get_bias_parallel_dim_degrees(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input); +ParallelTensorDimDegrees + get_output_parallel_dim_degrees(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input); + tl::expected get_projection_shape(LinearAttrs const &attrs, ParallelTensorShape const &input); @@ -49,6 +61,24 @@ tl::expected, std::string> get_initializers( std::nullopt, std::optional const &kernel_initializer = std::nullopt); +OperatorTaskSpace get_operator_task_space( + LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_input_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_projection_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_bias_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/loss_functions.h b/lib/op-attrs/include/op-attrs/ops/loss_functions.h index 657f8d91dc..c19d7f9e87 100644 --- a/lib/op-attrs/include/op-attrs/ops/loss_functions.h +++ b/lib/op-attrs/include/op-attrs/ops/loss_functions.h @@ -1,7 +1,6 @@ #ifndef _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_LOSS_FUNCTIONS_H #define _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_LOSS_FUNCTIONS_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" #include "op-attrs/ops/loss_functions/loss_function.dtg.h" #include "op-attrs/ops/loss_functions/nonconfigurable_loss_attrs.dtg.h" diff --git a/lib/op-attrs/include/op-attrs/ops/noop.h b/lib/op-attrs/include/op-attrs/ops/noop.h index 2c61dff886..8c8e191132 100644 --- a/lib/op-attrs/include/op-attrs/ops/noop.h +++ b/lib/op-attrs/include/op-attrs/ops/noop.h @@ -1,15 +1,12 @@ -#ifndef _FLEXFLOW_OP_ATTRS_OPS_NOOP_H -#define _FLEXFLOW_OP_ATTRS_OPS_NOOP_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_NOOP_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_NOOP_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/noop_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(NoopAttrs); - TensorShape get_output_shape(NoopAttrs const &, TensorShape const &); ParallelTensorShape get_output_shape(NoopAttrs const &, ParallelTensorShape const &); diff --git a/lib/op-attrs/include/op-attrs/ops/pool_2d.h b/lib/op-attrs/include/op-attrs/ops/pool_2d.h index 368250c957..016e632b33 100644 --- a/lib/op-attrs/include/op-attrs/ops/pool_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/pool_2d.h @@ -1,16 +1,14 @@ -#ifndef _FLEXFLOW_POOL_2D_ATTRS_H -#define _FLEXFLOW_POOL_2D_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_POOL_2D_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_POOL_2D_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/pool_2d_attrs.dtg.h" #include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" +#include namespace FlexFlow { -CHECK_VALID_OP_ATTR(Pool2DAttrs); - tl::expected make_adaptive_pool2d_attrs(TensorDims const &input_dims, positive_int output_h, diff --git a/lib/op-attrs/include/op-attrs/ops/reduce.h b/lib/op-attrs/include/op-attrs/ops/reduce.h index 04e44b4161..5595ab9df5 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduce.h +++ b/lib/op-attrs/include/op-attrs/ops/reduce.h @@ -1,14 +1,11 @@ -#ifndef _FLEXFLOW_OP_META_OPS_REDUCE_ATTRS_H -#define _FLEXFLOW_OP_META_OPS_REDUCE_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REDUCE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REDUCE_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/reduce_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(ReduceAttrs); - ParallelTensorShape get_output_shape(ReduceAttrs const &, ParallelTensorShape const &input_shape); diff --git a/lib/op-attrs/include/op-attrs/ops/reduction.h b/lib/op-attrs/include/op-attrs/ops/reduction.h index e8b2483cd5..b107178744 100644 --- a/lib/op-attrs/include/op-attrs/ops/reduction.h +++ b/lib/op-attrs/include/op-attrs/ops/reduction.h @@ -1,7 +1,6 @@ -#ifndef _FLEXFLOW_REDUCTION_ATTRS_H -#define _FLEXFLOW_REDUCTION_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REDUCTION_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REDUCTION_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/reduction_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "utils/record_formatter.h" @@ -9,8 +8,6 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(ReductionAttrs); - RecordFormatter as_dot(ReductionAttrs const &); tl::expected diff --git a/lib/op-attrs/include/op-attrs/ops/repartition.h b/lib/op-attrs/include/op-attrs/ops/repartition.h index b67486ed35..7733bc6989 100644 --- a/lib/op-attrs/include/op-attrs/ops/repartition.h +++ b/lib/op-attrs/include/op-attrs/ops/repartition.h @@ -1,7 +1,6 @@ -#ifndef _FLEXFLOW_PARTITION_ATTRS_H -#define _FLEXFLOW_PARTITION_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REPARTITION_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REPARTITION_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/repartition_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "utils/record_formatter.h" @@ -9,8 +8,6 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(RepartitionAttrs); - RecordFormatter as_dot(RepartitionAttrs const &); tl::expected diff --git a/lib/op-attrs/include/op-attrs/ops/replicate.h b/lib/op-attrs/include/op-attrs/ops/replicate.h index 10a4636d27..6a6ecd3d1e 100644 --- a/lib/op-attrs/include/op-attrs/ops/replicate.h +++ b/lib/op-attrs/include/op-attrs/ops/replicate.h @@ -1,15 +1,12 @@ -#ifndef _FLEXFLOW_REPLICATE_ATTRS_H -#define _FLEXFLOW_REPLICATE_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REPLICATE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REPLICATE_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/replicate_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "utils/record_formatter.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(ReplicateAttrs); - RecordFormatter as_dot(ReplicateAttrs const &); ParallelTensorShape get_output_shape(ReplicateAttrs const &attrs, diff --git a/lib/op-attrs/include/op-attrs/ops/reshape.h b/lib/op-attrs/include/op-attrs/ops/reshape.h index e87ca5c750..c7b8863ed6 100644 --- a/lib/op-attrs/include/op-attrs/ops/reshape.h +++ b/lib/op-attrs/include/op-attrs/ops/reshape.h @@ -1,14 +1,11 @@ -#ifndef _FLEXFLOW_RESHAPE_ATTRS_H -#define _FLEXFLOW_RESHAPE_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_RESHAPE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_RESHAPE_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/reshape_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(ReshapeAttrs); - TensorShape get_output_shape(ReshapeAttrs const &attrs, TensorShape const &input_shape); ParallelTensorShape get_output_shape(ReshapeAttrs const &attrs, diff --git a/lib/op-attrs/include/op-attrs/ops/reverse.h b/lib/op-attrs/include/op-attrs/ops/reverse.h index 023e714c20..7b8ea7cbe5 100644 --- a/lib/op-attrs/include/op-attrs/ops/reverse.h +++ b/lib/op-attrs/include/op-attrs/ops/reverse.h @@ -1,15 +1,12 @@ #ifndef _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REVERSE_H #define _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_REVERSE_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/reverse_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(ReverseAttrs); - TensorShape get_output_shape(ReverseAttrs const &, TensorShape const &); ParallelTensorShape get_output_shape(ReverseAttrs const &attrs, ParallelTensorShape const &input_shape); diff --git a/lib/op-attrs/include/op-attrs/ops/softmax.h b/lib/op-attrs/include/op-attrs/ops/softmax.h index 6eacc66b78..63bd7f1736 100644 --- a/lib/op-attrs/include/op-attrs/ops/softmax.h +++ b/lib/op-attrs/include/op-attrs/ops/softmax.h @@ -1,15 +1,13 @@ -#ifndef _FLEXFLOW_SOFTMAX_ATTRS_H -#define _FLEXFLOW_SOFTMAX_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_SOFTMAX_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_SOFTMAX_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/softmax_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" +#include namespace FlexFlow { -CHECK_VALID_OP_ATTR(SoftmaxAttrs); - tl::expected get_output_shape(SoftmaxAttrs const &attrs, TensorShape const &input_shape); tl::expected diff --git a/lib/op-attrs/include/op-attrs/ops/split.h b/lib/op-attrs/include/op-attrs/ops/split.h index e6a08d6e77..b29a591b1b 100644 --- a/lib/op-attrs/include/op-attrs/ops/split.h +++ b/lib/op-attrs/include/op-attrs/ops/split.h @@ -1,7 +1,6 @@ -#ifndef _FLEXFLOW_SPLIT_ATTRS_H -#define _FLEXFLOW_SPLIT_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_SPLIT_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_SPLIT_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/split_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" @@ -9,8 +8,6 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(SplitAttrs); - std::vector get_output_shapes(SplitAttrs const &, TensorShape const &); std::vector diff --git a/lib/op-attrs/include/op-attrs/ops/topk.h b/lib/op-attrs/include/op-attrs/ops/topk.h index d6de90903a..cf28d0f8e9 100644 --- a/lib/op-attrs/include/op-attrs/ops/topk.h +++ b/lib/op-attrs/include/op-attrs/ops/topk.h @@ -1,15 +1,12 @@ -#ifndef _FLEXFLOW_TOPK_ATTRS_H -#define _FLEXFLOW_TOPK_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_TOPK_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_TOPK_H -#include "op-attrs/ops/core.h" #include "op-attrs/ops/topk_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(TopKAttrs); - TensorShape get_output_shape(TopKAttrs const &, TensorShape const &); ParallelTensorShape get_output_shape(TopKAttrs const &attrs, ParallelTensorShape const &input_shape); diff --git a/lib/op-attrs/include/op-attrs/ops/transpose.h b/lib/op-attrs/include/op-attrs/ops/transpose.h index 6de83ee414..6b10e9b0bb 100644 --- a/lib/op-attrs/include/op-attrs/ops/transpose.h +++ b/lib/op-attrs/include/op-attrs/ops/transpose.h @@ -1,19 +1,34 @@ -#ifndef _FLEXFLOW_OP_META_OPS_TRANSPOSE_ATTRS_H -#define _FLEXFLOW_OP_META_OPS_TRANSPOSE_ATTRS_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_TRANSPOSE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_TRANSPOSE_H -#include "op-attrs/ops/core.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" #include "op-attrs/ops/transpose_attrs.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" namespace FlexFlow { -CHECK_VALID_OP_ATTR(TransposeAttrs); - TensorShape get_output_shape(TransposeAttrs const &, TensorShape const &); + +ParallelTensorDimDegrees get_output_parallel_dim_degrees(TransposeAttrs const &, + ParallelTensorDimDegrees const &); + ParallelTensorShape get_output_shape(TransposeAttrs const &, ParallelTensorShape const &); +OperatorTaskSpace get_operator_task_space(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_input_mapping(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/ops/transpose_attrs.struct.toml b/lib/op-attrs/include/op-attrs/ops/transpose_attrs.struct.toml index 50756f095b..9c65ed6e75 100644 --- a/lib/op-attrs/include/op-attrs/ops/transpose_attrs.struct.toml +++ b/lib/op-attrs/include/op-attrs/ops/transpose_attrs.struct.toml @@ -10,11 +10,9 @@ features = [ ] includes = [ - "op-attrs/ff_dim_t.h", - "op-attrs/ff_dim_t.dtg.h", - "op-attrs/ff_ordered/ff_ordered.h", + "op-attrs/tensor_dim_permutation.h", ] [[fields]] -name = "perm" -type = "::FlexFlow::FFOrdered<::FlexFlow::ff_dim_t>" +name = "permutation" +type = "::FlexFlow::TensorDimPermutation" diff --git a/lib/op-attrs/include/op-attrs/ops/weight.h b/lib/op-attrs/include/op-attrs/ops/weight.h index 66eb0064ed..5c7f791a75 100644 --- a/lib/op-attrs/include/op-attrs/ops/weight.h +++ b/lib/op-attrs/include/op-attrs/ops/weight.h @@ -1,7 +1,8 @@ #ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_WEIGHT_H #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_OPS_WEIGHT_H -#include "op-attrs/ops/core.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" #include "op-attrs/ops/weight_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/tensor_shape.dtg.h" @@ -9,13 +10,17 @@ namespace FlexFlow { -CHECK_VALID_OP_ATTR(WeightAttrs); - RecordFormatter as_dot(WeightAttrs const &); TensorShape get_output_shape(WeightAttrs const &); ParallelTensorShape get_output_parallel_tensor_shape(WeightAttrs const &); +OperatorTaskSpace get_operator_task_space(WeightAttrs const &); + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(WeightAttrs const &); + + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.h new file mode 100644 index 0000000000..9e0725b892 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_degrees.h @@ -0,0 +1,44 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_DEGREES_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_DEGREES_H + +#include "op-attrs/num_ptensor_shard_dims_t.dtg.h" +#include "op-attrs/num_tensor_dims_t.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" +#include "op-attrs/parallel_tensor_space_coordinate.dtg.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" + +namespace FlexFlow { + +num_ptensor_shard_dims_t get_ptensor_dim_degrees_num_shard_dims(ParallelTensorDimDegrees const &); +num_tensor_dims_t get_ptensor_dim_degrees_num_tensor_dims(ParallelTensorDimDegrees const &); + +std::unordered_set + get_parallel_tensor_dim_indices(ParallelTensorDimDegrees const &); + +std::set get_nontrivial_parallel_tensor_dim_indices( + ParallelTensorDimDegrees const &); + +positive_int get_degree_for_parallel_tensor_dim_idx(ParallelTensorDimDegrees const &, + parallel_tensor_dim_idx_t const &); + +std::unordered_map + get_parallel_tensor_degree_map(ParallelTensorDimDegrees const &); + +std::unordered_set + get_parallel_tensor_space_coordinates(ParallelTensorDimDegrees const &); + +DimDomain + dim_domain_from_parallel_tensor_dim_degrees(ParallelTensorDimDegrees const &); + +ParallelTensorDimDegrees + parallel_tensor_dim_degrees_from_dim_domain(DimDomain const &); + +MinimalDimDomain + minimal_dim_domain_from_parallel_tensor_dim_degrees(ParallelTensorDimDegrees const &); + + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h new file mode 100644 index 0000000000..284f8959c2 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_T_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_T_H + +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" +#include "utils/orthotope/dim_ordering.dtg.h" + +namespace FlexFlow { + +parallel_tensor_dim_idx_t sum_dim_idx(); +parallel_tensor_dim_idx_t discard_copy_dim_idx(); +parallel_tensor_dim_idx_t shard_dim_idx(ff_dim_t); + +bool + is_dim_idx_for_reduction_dimension(parallel_tensor_dim_idx_t); + +std::set + dim_idxs_for_num_shard_dims(nonnegative_int num_shard_dims); + +DimOrdering + get_parallel_tensor_dim_ordering(); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml index 7e7356a5e7..9b3e15dfd5 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml @@ -6,15 +6,31 @@ features = [ "hash", "json", "fmt", + "rapidcheck", ] +docstring = """\ +@brief Index type for the dimensions of @ref ParallelTensorDimDegrees. + +@ref parallel_tensor_dim_idx_t is to @ref ParallelTensorDimDegrees as +@ref operator_task_space_dim_idx_t is to @ref OperatorTaskSpace as +@ref MachineSpecificationDimension is to @ref MachineComputeSpecification. +""" + includes = [ "op-attrs/ff_dim_t.dtg.h", "op-attrs/replica_type.dtg.h", ] +src_includes = [ + "op-attrs/ff_dim_t.h", +] + [[values]] type = "::FlexFlow::ff_dim_t" +key = "shard_dim" + [[values]] type = "::FlexFlow::ReplicaType" +key = "replica_dim" diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h index 435a962963..9e71785013 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIMS_H #define _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIMS_H +#include "op-attrs/num_ptensor_shard_dims_t.dtg.h" #include "op-attrs/parallel_dim.h" #include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_dims.dtg.h" @@ -13,7 +14,7 @@ FFOrdered ff_ordered_shard_degrees(ParallelTensorDims const &); std::unordered_set replica_dims(ParallelTensorDims const &); /* size_t get_volume(ParallelTensorDims const &); */ -nonnegative_int num_shard_dims(ParallelTensorDims const &); +num_ptensor_shard_dims_t num_shard_dims(ParallelTensorDims const &); ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorDims const &); diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index e366f99b8e..e23ae33cbf 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -2,6 +2,7 @@ #define _OP_META_PARALLEL_TENSOR_SHAPE_H #include "op-attrs/ff_dim_t.h" +#include "op-attrs/num_ptensor_shard_dims_t.dtg.h" #include "op-attrs/parallel_dim.h" #include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" @@ -12,7 +13,7 @@ namespace FlexFlow { -nonnegative_int num_shard_dims(ParallelTensorShape const &); +num_ptensor_shard_dims_t num_shard_dims(ParallelTensorShape const &); ShardParallelDim shard_dim_at_idx(ParallelTensorShape const &, relative_ff_dim_t); ShardParallelDim &shard_dim_at_idx(ParallelTensorShape &, relative_ff_dim_t); @@ -34,9 +35,12 @@ ParallelTensorShape lift_to_parallel_with_degrees(TensorShape const &, ParallelTensorDimDegrees const &); +TensorShape get_piece_shape(ParallelTensorShape const &); +num_bytes_t get_piece_size_in_bytes(ParallelTensorShape const &); + std::unordered_set replica_dims(ParallelTensorShape const &); -TensorShape get_piece_shape(ParallelTensorShape const &); + positive_int get_num_replica_dims(ParallelTensorShape const &); positive_int get_num_replicas(ParallelTensorShape const &); @@ -48,7 +52,6 @@ positive_int get_total_parallel_degree(ParallelTensorShape const &); bool is_valid(ParallelTensorShape const &); TensorShape require_not_parallel(ParallelTensorShape const &); -TensorShape get_tensor_shape_unsafe(ParallelTensorShape const &); std::vector get_tensor_shapes_unsafe(std::vector const &); diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_space_coordinate.h b/lib/op-attrs/include/op-attrs/parallel_tensor_space_coordinate.h new file mode 100644 index 0000000000..9d282a3e85 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_space_coordinate.h @@ -0,0 +1,32 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_SPACE_COORDINATE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_SPACE_COORDINATE_H + +#include "op-attrs/num_ptensor_parallel_dims_t.h" +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" +#include "op-attrs/parallel_tensor_space_coordinate.dtg.h" +#include "utils/orthotope/dim_coord.dtg.h" + +namespace FlexFlow { + +num_ptensor_parallel_dims_t ptensor_coord_num_dims(ParallelTensorSpaceCoordinate const &); + +std::unordered_set + get_dim_idxs_in_ptensor_space_coord(ParallelTensorSpaceCoordinate const &); + +nonnegative_int ptensor_coord_component_for_ptensor_dim_idx( + ParallelTensorSpaceCoordinate const &, + parallel_tensor_dim_idx_t); + +ParallelTensorSpaceCoordinate parallel_tensor_space_coord_from_map( + std::unordered_map const &); + +ParallelTensorSpaceCoordinate parallel_tensor_space_coord_from_dim_coord( + DimCoord const &); + +DimCoord + dim_coord_from_parallel_tensor_space_coord( + ParallelTensorSpaceCoordinate const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_space_coordinate.struct.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_space_coordinate.struct.toml new file mode 100644 index 0000000000..4acf0414cf --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_space_coordinate.struct.toml @@ -0,0 +1,27 @@ +namespace = "FlexFlow" +name = "ParallelTensorSpaceCoordinate" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", + "rapidcheck", +] + +includes = [ + "op-attrs/ff_ordered/ff_ordered.h", + "utils/nonnegative_int/nonnegative_int.h", +] + +[[fields]] +name = "sum_component" +type = "::FlexFlow::nonnegative_int" + +[[fields]] +name = "discard_copy_component" +type = "::FlexFlow::nonnegative_int" + +[[fields]] +name = "shard_components" +type = "::FlexFlow::FFOrdered<::FlexFlow::nonnegative_int>" diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.h b/lib/op-attrs/include/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.h new file mode 100644 index 0000000000..e93a4ad94c --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_SPACE_MAPPING_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_SPACE_MAPPING_H + +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.dtg.h" +#include "utils/orthotope/dim_projection.dtg.h" + +namespace FlexFlow { + +ParallelTensorSpaceToParallelTensorSpaceMapping + parallel_tensor_space_mapping_from_projection( + DimProjection const &projection, + ParallelTensorDimDegrees const &l_degrees, + ParallelTensorDimDegrees const &r_degrees); + +ParallelTensorSpaceToParallelTensorSpaceMapping + invert_parallel_tensor_space_mapping( + ParallelTensorSpaceToParallelTensorSpaceMapping const &); + + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.struct.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.struct.toml new file mode 100644 index 0000000000..e68787accd --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.struct.toml @@ -0,0 +1,16 @@ +namespace = "FlexFlow" +name = "ParallelTensorSpaceToParallelTensorSpaceMapping" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "utils/orthotope/dim_domain_mapping.h", + "op-attrs/parallel_tensor_dim_idx_t.dtg.h", +] + +[[fields]] +name = "raw_mapping" +type = "::FlexFlow::DimDomainMapping<::FlexFlow::parallel_tensor_dim_idx_t, ::FlexFlow::parallel_tensor_dim_idx_t>" diff --git a/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h b/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h index 5205b1ead8..9ca3ce0afb 100644 --- a/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h +++ b/lib/op-attrs/include/op-attrs/relative_ff_dim_t.h @@ -2,12 +2,13 @@ #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_RELATIVE_FF_DIM_T_H #include "op-attrs/ff_dim_t.dtg.h" +#include "op-attrs/num_tensor_dims_t.h" #include "op-attrs/relative_ff_dim_t.dtg.h" -#include "rapidcheck.h" +#include namespace FlexFlow { ff_dim_t ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t ff_dim, - nonnegative_int input_dim); + num_tensor_dims_t input_dim); } // namespace FlexFlow namespace rc { diff --git a/lib/op-attrs/include/op-attrs/replica_type.enum.toml b/lib/op-attrs/include/op-attrs/replica_type.enum.toml index 0c0eb5e3ab..ccf4b5ffdd 100644 --- a/lib/op-attrs/include/op-attrs/replica_type.enum.toml +++ b/lib/op-attrs/include/op-attrs/replica_type.enum.toml @@ -8,7 +8,7 @@ features = [ ] [[values]] -name = "SUM" +name = "DISCARD_COPY" [[values]] -name = "DISCARD_COPY" +name = "SUM" diff --git a/lib/op-attrs/include/op-attrs/shape_inference.h b/lib/op-attrs/include/op-attrs/shape_inference.h index 8c679f442a..732b9320ad 100644 --- a/lib/op-attrs/include/op-attrs/shape_inference.h +++ b/lib/op-attrs/include/op-attrs/shape_inference.h @@ -1,5 +1,5 @@ -#ifndef _FLEXFLOW_INCLUDE_OP_ATTRS_SHAPE_INFERENCE_H -#define _FLEXFLOW_INCLUDE_OP_ATTRS_SHAPE_INFERENCE_H +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_SHAPE_INFERENCE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_SHAPE_INFERENCE_H #include "op-attrs/computation_graph_op_attrs.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" diff --git a/lib/op-attrs/include/op-attrs/task_space_coordinate.h b/lib/op-attrs/include/op-attrs/task_space_coordinate.h new file mode 100644 index 0000000000..1f73a47993 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/task_space_coordinate.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_TASK_SPACE_COORDINATE_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_TASK_SPACE_COORDINATE_H + +#include "op-attrs/operator_task_space_dim_idx_t.dtg.h" +#include "op-attrs/task_space_coordinate.dtg.h" +#include "utils/orthotope/dim_coord.dtg.h" + +namespace FlexFlow { + +nonnegative_int task_space_coord_num_dims(TaskSpaceCoordinate const &); + +TaskSpaceCoordinate + make_task_space_coordinate(std::vector const &); + +TaskSpaceCoordinate + task_space_coordinate_from_dim_coord( + DimCoord const &); + +DimCoord + dim_coord_from_task_space_coordinate(TaskSpaceCoordinate const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/task_space_coordinate.struct.toml b/lib/op-attrs/include/op-attrs/task_space_coordinate.struct.toml new file mode 100644 index 0000000000..4f8a281212 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/task_space_coordinate.struct.toml @@ -0,0 +1,18 @@ +namespace = "FlexFlow" +name = "TaskSpaceCoordinate" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "utils/orthotope/orthotope_coord.dtg.h", +] + +[[fields]] +name = "orthotope_coord" +type = "::FlexFlow::OrthotopeCoord" diff --git a/lib/op-attrs/include/op-attrs/tensor_dim_permutation.h b/lib/op-attrs/include/op-attrs/tensor_dim_permutation.h new file mode 100644 index 0000000000..f555d6082a --- /dev/null +++ b/lib/op-attrs/include/op-attrs/tensor_dim_permutation.h @@ -0,0 +1,105 @@ +#ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_TENSOR_DIM_PERMUTATION_H +#define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_TENSOR_DIM_PERMUTATION_H + +#include "op-attrs/ff_dim_t.dtg.h" +#include "op-attrs/ff_ordered/ff_ordered.h" +#include "op-attrs/num_tensor_dims_t.h" +#include "op-attrs/tensor_dims.dtg.h" +#include "op-attrs/tensor_shape.dtg.h" +#include "utils/bidict/bidict.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" + +namespace FlexFlow { + +struct TensorDimPermutation { + TensorDimPermutation() = delete; + + TensorDimPermutation(bidict const &); + + bool operator==(TensorDimPermutation const &) const; + bool operator!=(TensorDimPermutation const &) const; + + bool operator<(TensorDimPermutation const &) const; + bool operator>(TensorDimPermutation const &) const; + bool operator<=(TensorDimPermutation const &) const; + bool operator>=(TensorDimPermutation const &) const; + + ff_dim_t at_l(ff_dim_t) const; + ff_dim_t at_r(ff_dim_t) const; + + num_tensor_dims_t num_tensor_dims() const; + + bidict const &as_bidict() const; +private: + bidict raw; +private: + std::tuple< + decltype(raw) const & + > tie() const; + + friend struct std::hash; +}; + +bidict format_as(TensorDimPermutation const &); +std::ostream &operator<<(std::ostream &, TensorDimPermutation const &); + +TensorDimPermutation compose_tensor_dim_permutations( + TensorDimPermutation const &, + TensorDimPermutation const &); + +TensorDimPermutation invert_tensor_dim_permutation( + TensorDimPermutation const &); + +TensorDims + permute_tensor_dims(TensorDimPermutation const &, + TensorDims const &); + +TensorShape + permute_tensor_shape(TensorDimPermutation const &, + TensorShape const &); + +ParallelTensorDimDegrees + permute_parallel_tensor_dim_degrees(TensorDimPermutation const &, + ParallelTensorDimDegrees const &); + +ParallelTensorDims + permute_parallel_tensor_dims(TensorDimPermutation const &, + ParallelTensorDims const &); + +ParallelTensorShape + permute_parallel_tensor_shape(TensorDimPermutation const &, + ParallelTensorShape const &); + +} // namespace FlexFlow + +namespace nlohmann { + +template <> +struct adl_serializer<::FlexFlow::TensorDimPermutation> { + static ::FlexFlow::TensorDimPermutation from_json(json const &); + static void to_json(json &, ::FlexFlow::TensorDimPermutation const &); +}; + +} // namespace nlohmann + +namespace rc { + +template <> +struct Arbitrary<::FlexFlow::TensorDimPermutation> { + static Gen<::FlexFlow::TensorDimPermutation> arbitrary(); +}; + +} // namespace rc + + +namespace std { + +template <> +struct hash<::FlexFlow::TensorDimPermutation> { + size_t operator()(::FlexFlow::TensorDimPermutation const &) const; +}; + +} + +#endif diff --git a/lib/op-attrs/include/op-attrs/tensor_dims.h b/lib/op-attrs/include/op-attrs/tensor_dims.h index 0f5b987944..e0c8aa2dc6 100644 --- a/lib/op-attrs/include/op-attrs/tensor_dims.h +++ b/lib/op-attrs/include/op-attrs/tensor_dims.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_TENSOR_DIMS_H #define _FLEXFLOW_LIB_OP_ATTRS_INCLUDE_OP_ATTRS_TENSOR_DIMS_H +#include "op-attrs/num_tensor_dims_t.h" #include "op-attrs/parallel_tensor_dims.dtg.h" #include "op-attrs/tensor_dims.dtg.h" #include "op-attrs/tensor_dims_coord.dtg.h" @@ -12,7 +13,7 @@ FFOrdered const &ff_ordered(TensorDims const &); bool tensor_dims_has_dim(TensorDims const &, ff_dim_t); -nonnegative_int get_num_dims(TensorDims const &); +num_tensor_dims_t get_num_dims(TensorDims const &); positive_int dim_at_idx(TensorDims const &, relative_ff_dim_t); positive_int &dim_at_idx(TensorDims &, relative_ff_dim_t); diff --git a/lib/pcg/include/pcg/tensor_role.enum.toml b/lib/op-attrs/include/op-attrs/tensor_role.enum.toml similarity index 100% rename from lib/pcg/include/pcg/tensor_role.enum.toml rename to lib/op-attrs/include/op-attrs/tensor_role.enum.toml diff --git a/lib/op-attrs/src/op-attrs/computation_graph_op_attrs.cc b/lib/op-attrs/src/op-attrs/computation_graph_op_attrs.cc index f65a0f5f08..9d1a9f68d4 100644 --- a/lib/op-attrs/src/op-attrs/computation_graph_op_attrs.cc +++ b/lib/op-attrs/src/op-attrs/computation_graph_op_attrs.cc @@ -31,20 +31,14 @@ RecordFormatter as_dot(ComputationGraphOpAttrs const &attrs) { return result; } -ComputationGraphOpAttrs +std::optional compgraph_op_attrs_from_pcg_op_attrs(PCGOperatorAttrs const &op) { - auto fail_on_parallel_op = [](auto const &attrs) -> ComputationGraphOpAttrs { - throw mk_runtime_error( - fmt::format("Encountered parallel operator in " - "compgraph_op_attrs_from_pcg_op_attrs: {}", - attrs)); - }; - - return op.visit(overload{ - [&](CombineAttrs const &attrs) { return fail_on_parallel_op(attrs); }, - [&](ReductionAttrs const &attrs) { return fail_on_parallel_op(attrs); }, - [&](RepartitionAttrs const &attrs) { return fail_on_parallel_op(attrs); }, - [&](ReplicateAttrs const &attrs) { return fail_on_parallel_op(attrs); }, + + return op.visit>(overload{ + [&](CombineAttrs const &attrs) { return std::nullopt; }, + [&](ReductionAttrs const &attrs) { return std::nullopt; }, + [&](RepartitionAttrs const &attrs) { return std::nullopt; }, + [&](ReplicateAttrs const &attrs) { return std::nullopt; }, [](auto const &attrs) { return ComputationGraphOpAttrs{attrs}; }, }); } diff --git a/lib/op-attrs/src/op-attrs/dim_ordered/slice.cc b/lib/op-attrs/src/op-attrs/dim_ordered/slice.cc deleted file mode 100644 index 8c3dbd7bbc..0000000000 --- a/lib/op-attrs/src/op-attrs/dim_ordered/slice.cc +++ /dev/null @@ -1 +0,0 @@ -#include "op-attrs/dim_ordered/slice.h" diff --git a/lib/op-attrs/src/op-attrs/dim_ordered/transform.cc b/lib/op-attrs/src/op-attrs/dim_ordered/transform.cc deleted file mode 100644 index 73683eba94..0000000000 --- a/lib/op-attrs/src/op-attrs/dim_ordered/transform.cc +++ /dev/null @@ -1 +0,0 @@ -#include "op-attrs/dim_ordered/transform.h" diff --git a/lib/op-attrs/src/op-attrs/dim_ordered/zip.cc b/lib/op-attrs/src/op-attrs/dim_ordered/zip.cc deleted file mode 100644 index 208fc4a719..0000000000 --- a/lib/op-attrs/src/op-attrs/dim_ordered/zip.cc +++ /dev/null @@ -1 +0,0 @@ -#include "op-attrs/dim_ordered/zip.h" diff --git a/lib/op-attrs/src/op-attrs/ff_dim_t.cc b/lib/op-attrs/src/op-attrs/ff_dim_t.cc index 63c783d909..0d3de74735 100644 --- a/lib/op-attrs/src/op-attrs/ff_dim_t.cc +++ b/lib/op-attrs/src/op-attrs/ff_dim_t.cc @@ -1,4 +1,6 @@ #include "op-attrs/ff_dim_t.h" +#include "utils/containers/transform.h" +#include "utils/nonnegative_int/nonnegative_range.h" namespace FlexFlow { @@ -11,6 +13,11 @@ ff_dim_t add_to_ff_dim(ff_dim_t ff_dim, int value) { return ff_dim_t{nonnegative_int{ff_dim.value.unwrap_nonnegative() + value}}; } +std::vector ff_dim_range(nonnegative_int num_elements) { + return transform(nonnegative_range(num_elements), + [](nonnegative_int idx) { return ff_dim_t{idx}; }); +} + } // namespace FlexFlow namespace rc { diff --git a/lib/op-attrs/src/op-attrs/ff_ordered/get_idxs.cc b/lib/op-attrs/src/op-attrs/ff_ordered/get_idxs.cc index 3da15bebba..7b93643735 100644 --- a/lib/op-attrs/src/op-attrs/ff_ordered/get_idxs.cc +++ b/lib/op-attrs/src/op-attrs/ff_ordered/get_idxs.cc @@ -5,6 +5,6 @@ namespace FlexFlow { using T = value_type<0>; -template std::vector get_idxs(FFOrdered const &); +template std::set get_idxs(FFOrdered const &); } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ff_ordered/map_from_ff_ordered.cc b/lib/op-attrs/src/op-attrs/ff_ordered/map_from_ff_ordered.cc new file mode 100644 index 0000000000..eba1ce19c8 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/ff_ordered/map_from_ff_ordered.cc @@ -0,0 +1,11 @@ +#include "op-attrs/ff_ordered/map_from_ff_ordered.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template + std::unordered_map map_from_ff_ordered(FFOrdered const &); + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/get_incoming_tensor_roles.cc b/lib/op-attrs/src/op-attrs/get_incoming_tensor_roles.cc index 21efc26466..310b7b549a 100644 --- a/lib/op-attrs/src/op-attrs/get_incoming_tensor_roles.cc +++ b/lib/op-attrs/src/op-attrs/get_incoming_tensor_roles.cc @@ -10,14 +10,14 @@ namespace FlexFlow { std::vector get_incoming_tensor_roles( - ComputationGraphOpAttrs const &comp_graph_op_attrs, int num_incoming) { + ComputationGraphOpAttrs const &comp_graph_op_attrs, nonnegative_int num_incoming) { return get_incoming_tensor_roles( pcg_op_attrs_from_compgraph_op_attrs(comp_graph_op_attrs), num_incoming); } std::vector get_incoming_tensor_roles(PCGOperatorAttrs const &pcg_op_attrs, - int num_incoming) { + nonnegative_int num_incoming) { return pcg_op_attrs.visit>(overload{ [](BatchMatmulAttrs const &) { return std::vector{IncomingTensorRole::INPUT, @@ -34,7 +34,7 @@ std::vector return std::vector{IncomingTensorRole::INPUT}; }, [&](ConcatAttrs const &) { - return std::vector(num_incoming, IncomingTensorRole::INPUT); + return std::vector(num_incoming.unwrap_nonnegative(), IncomingTensorRole::INPUT); }, [](Conv2DAttrs const &attrs) { return get_conv2d_incoming_tensor_roles(attrs); diff --git a/lib/op-attrs/src/op-attrs/get_operator_space_to_parallel_tensor_space_mappings.cc b/lib/op-attrs/src/op-attrs/get_operator_space_to_parallel_tensor_space_mappings.cc new file mode 100644 index 0000000000..792afa5015 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/get_operator_space_to_parallel_tensor_space_mappings.cc @@ -0,0 +1,188 @@ +#include "op-attrs/get_operator_space_to_parallel_tensor_space_mappings.h" +#include "op-attrs/get_incoming_tensor_roles.h" +#include "op-attrs/ops/element_binary.h" +#include "utils/containers/filtrans.h" +#include "utils/containers/get_only.h" +#include "utils/overload.h" +#include "op-attrs/ops/linear.h" +#include "op-attrs/ops/element_unary.h" +#include "op-attrs/ops/input.h" +#include "op-attrs/ops/transpose.h" +#include "op-attrs/ops/weight.h" + +namespace FlexFlow { + +std::vector + get_operator_to_incoming_mappings( + ComputationGraphOpAttrs const &comp_graph_op_attrs, + std::vector const &inputs_degrees) { + return comp_graph_op_attrs.visit< + std::vector + >(overload { + [&](ElementBinaryAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 2); + + ParallelTensorDimDegrees lhs_degrees = inputs_degrees.at(0); + ParallelTensorDimDegrees rhs_degrees = inputs_degrees.at(1); + + return std::vector{ + get_operator_to_lhs_input_mapping(attrs, lhs_degrees, rhs_degrees), + get_operator_to_rhs_input_mapping(attrs, lhs_degrees, rhs_degrees), + }; + }, + [&](ElementUnaryAttrs const &attrs) { + return std::vector{ + get_operator_to_input_mapping(attrs, get_only(inputs_degrees)), + }; + }, + [&](InputAttrs const &) { + ASSERT(inputs_degrees.size() == 0); + + return std::vector{}; + }, + [&](LinearAttrs const &attrs) { + ParallelTensorDimDegrees input_degrees = get_only(inputs_degrees); + + std::vector result = { + get_operator_to_input_mapping(attrs, input_degrees), + get_operator_to_projection_mapping(attrs, input_degrees), + }; + + if (attrs.use_bias) { + result.push_back(get_operator_to_bias_mapping(attrs, input_degrees)); + }; + + return result; + }, + [&](TransposeAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 1); + + return std::vector{ + get_operator_to_input_mapping(attrs, get_only(inputs_degrees)), + }; + }, + [&](WeightAttrs const &) { + ASSERT(inputs_degrees.size() == 0); + + return std::vector{}; + }, + [](auto const &attrs) -> std::vector { + PANIC("Missing implmentation of get_operator_to_input_mappings", attrs); + }, + }); +} + +std::vector + get_operator_to_incoming_mappings_for_role(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees, + IncomingTensorRole incoming_tensor_role) { + + std::vector + incoming_mappings = get_operator_to_incoming_mappings(attrs, inputs_degrees); + + + std::vector + incoming_tensor_roles = get_incoming_tensor_roles(attrs, num_elements(inputs_degrees)); + + return filtrans(zip(incoming_mappings, incoming_tensor_roles), + [&](std::pair const &p) + -> std::optional + { + auto const &[mapping, role] = p; + + if (role == incoming_tensor_role) { + return mapping; + } else { + return std::nullopt; + } + }); +} + +std::vector + get_operator_to_input_mappings(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees) { + return get_operator_to_incoming_mappings_for_role(attrs, inputs_degrees, IncomingTensorRole::INPUT); +} + +std::vector + get_operator_to_weight_mappings(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees) { + + return get_operator_to_incoming_mappings_for_role(attrs, inputs_degrees, IncomingTensorRole::WEIGHT); +} + + +std::vector + get_operator_to_output_mappings( + ComputationGraphOpAttrs const &comp_graph_op_attrs, + std::vector const &inputs_degrees) { + + return comp_graph_op_attrs.visit< + std::vector + >(overload { + [&](ElementBinaryAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 2); + + ParallelTensorDimDegrees lhs_degrees = inputs_degrees.at(0); + ParallelTensorDimDegrees rhs_degrees = inputs_degrees.at(1); + + return std::vector{ + get_operator_to_output_mapping(attrs, lhs_degrees, rhs_degrees), + }; + }, + [&](ElementUnaryAttrs const &attrs) { + return std::vector{ + get_operator_to_output_mapping(attrs, get_only(inputs_degrees)), + }; + }, + [&](LinearAttrs const &attrs) { + return std::vector{ + get_operator_to_output_mapping(attrs, get_only(inputs_degrees)), + }; + }, + [&](InputAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 0); + + return std::vector{ + get_operator_to_output_mapping(attrs), + }; + }, + [&](TransposeAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 1); + + return std::vector{ + get_operator_to_output_mapping(attrs, get_only(inputs_degrees)), + }; + }, + [&](WeightAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 0); + + return std::vector{ + get_operator_to_output_mapping(attrs), + }; + }, + [](auto const &attrs) -> std::vector { + PANIC("Missing implmentation of get_operator_to_input_mappings", attrs); + }, + }); +} + + +std::vector + get_operator_to_ptensor_mappings_for_role(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees, + TensorRole role) { + switch (role) { + case TensorRole::INPUT: + return get_operator_to_input_mappings(attrs, inputs_degrees); + case TensorRole::WEIGHT: + return get_operator_to_weight_mappings(attrs, inputs_degrees); + case TensorRole::OUTPUT: + return get_operator_to_weight_mappings(attrs, inputs_degrees); + default: + PANIC("Unhandled TensorRole", role); + } +} + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/get_operator_task_space.cc b/lib/op-attrs/src/op-attrs/get_operator_task_space.cc new file mode 100644 index 0000000000..002e1998f7 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/get_operator_task_space.cc @@ -0,0 +1,60 @@ +#include "op-attrs/get_operator_task_space.h" +#include "utils/containers/get_only.h" +#include "utils/overload.h" +#include +#include "op-attrs/ops/element_unary.h" +#include "op-attrs/ops/element_binary.h" +#include "op-attrs/ops/linear.h" +#include "op-attrs/ops/input.h" +#include "op-attrs/ops/transpose.h" +#include "op-attrs/ops/weight.h" + +namespace FlexFlow { + +OperatorTaskSpace + get_operator_task_space(ComputationGraphOpAttrs const &attrs, + std::vector const &inputs_degrees) { + return attrs.visit< + OperatorTaskSpace + >(overload { + [&](ElementUnaryAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 1); + + return get_operator_task_space(attrs, get_only(inputs_degrees)); + }, + [&](ElementBinaryAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 2); + + return get_operator_task_space( + /*attrs=*/attrs, + /*lhs_input_degrees=*/inputs_degrees.at(0), + /*rhs_input_degrees=*/inputs_degrees.at(1)); + }, + [&](LinearAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 1); + + return get_operator_task_space(attrs, get_only(inputs_degrees)); + }, + [&](InputAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 0); + + return get_operator_task_space(attrs); + }, + [&](TransposeAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 1); + + return get_operator_task_space(attrs, get_only(inputs_degrees)); + }, + [&](WeightAttrs const &attrs) { + ASSERT(inputs_degrees.size() == 0); + + return get_operator_task_space(attrs); + }, + [](auto const &attrs) -> OperatorTaskSpace { + PANIC("Missing implmentation of get_operator_task_space", attrs); + }, + }); +} + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/num_ptensor_parallel_dims_t.cc b/lib/op-attrs/src/op-attrs/num_ptensor_parallel_dims_t.cc new file mode 100644 index 0000000000..0de6813034 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/num_ptensor_parallel_dims_t.cc @@ -0,0 +1,98 @@ +#include "op-attrs/num_ptensor_parallel_dims_t.h" +#include "utils/hash-utils.h" +#include +#include +#include + +namespace FlexFlow { + +num_ptensor_parallel_dims_t::num_ptensor_parallel_dims_t(int value) : value(value) { + this->check_invariant(); +} + +num_ptensor_parallel_dims_t::num_ptensor_parallel_dims_t(nonnegative_int value) + : value(value.unwrap_nonnegative()) { } + +num_ptensor_parallel_dims_t::num_ptensor_parallel_dims_t(positive_int value) + : value(value.int_from_positive_int()) { } + +bool num_ptensor_parallel_dims_t::operator<(num_ptensor_parallel_dims_t const &other) const { + return this->value < other.value; +} + +bool num_ptensor_parallel_dims_t::operator==(num_ptensor_parallel_dims_t const &other) const { + return this->value == other.value; +} + +bool num_ptensor_parallel_dims_t::operator>(num_ptensor_parallel_dims_t const &other) const { + return this->value > other.value; +} + +bool num_ptensor_parallel_dims_t::operator<=(num_ptensor_parallel_dims_t const &other) const { + return this->value <= other.value; +} + +bool num_ptensor_parallel_dims_t::operator!=(num_ptensor_parallel_dims_t const &other) const { + return this->value != other.value; +} + +bool num_ptensor_parallel_dims_t::operator>=(num_ptensor_parallel_dims_t const &other) const { + return this->value >= other.value; +} + +int num_ptensor_parallel_dims_t::int_from_num_ptensor_parallel_dims() const { + return this->value; +} + +nonnegative_int num_ptensor_parallel_dims_t::nonnegative_int_from_num_ptensor_parallel_dims() const { + return nonnegative_int{this->value}; +} + +positive_int num_ptensor_parallel_dims_t::positive_int_from_num_ptensor_parallel_dims() const { + return positive_int{this->value}; +} + +void num_ptensor_parallel_dims_t::check_invariant() const { + ASSERT(this->value >= 2); + ASSERT(this->value <= MAX_TENSOR_DIM + 2); +} + +std::ostream &operator<<(std::ostream &s, num_ptensor_parallel_dims_t const &m) { + return (s << fmt::to_string(m)); +} + +std::string format_as(num_ptensor_parallel_dims_t const &m) { + return fmt::format("{} parallel dims", m.int_from_num_ptensor_parallel_dims()); +} + +} // namespace FlexFlow + +namespace nlohmann { +::FlexFlow::num_ptensor_parallel_dims_t + adl_serializer<::FlexFlow::num_ptensor_parallel_dims_t>::from_json(json const &j) { + return ::FlexFlow::num_ptensor_parallel_dims_t{j.template get()}; +} + +void adl_serializer<::FlexFlow::num_ptensor_parallel_dims_t>::to_json( + json &j, ::FlexFlow::num_ptensor_parallel_dims_t t) { + j = t.int_from_num_ptensor_parallel_dims(); +} +} // namespace nlohmann + +namespace rc { + +Gen<::FlexFlow::num_ptensor_parallel_dims_t> + Arbitrary<::FlexFlow::num_ptensor_parallel_dims_t>::arbitrary() { + return gen::construct<::FlexFlow::num_ptensor_parallel_dims_t>(gen::arbitrary()); +} + +} // namespace rc + +namespace std { + +size_t hash<::FlexFlow::num_ptensor_parallel_dims_t>::operator()( + ::FlexFlow::num_ptensor_parallel_dims_t const &m) const noexcept { + return ::FlexFlow::get_std_hash(m.int_from_num_ptensor_parallel_dims()); +} + +} // namespace std diff --git a/lib/op-attrs/src/op-attrs/num_ptensor_shard_dims_t.cc b/lib/op-attrs/src/op-attrs/num_ptensor_shard_dims_t.cc new file mode 100644 index 0000000000..f0f0927ad0 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/num_ptensor_shard_dims_t.cc @@ -0,0 +1,19 @@ +#include "op-attrs/num_ptensor_shard_dims_t.h" + +namespace FlexFlow { + +num_ptensor_parallel_dims_t num_ptensor_parallel_dims_from_shard_dims(num_ptensor_shard_dims_t num_shard_dims) { + return num_ptensor_parallel_dims_t{ + num_shard_dims.value + 2_p + }; +} + +num_ptensor_shard_dims_t num_ptensor_shard_dims_from_parallel_dims(num_ptensor_parallel_dims_t num_parallel_dims) { + return num_ptensor_shard_dims_t{ + nonnegative_int{ + num_parallel_dims.int_from_num_ptensor_parallel_dims() - 2 + }, + }; +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/num_tensor_dims_t.cc b/lib/op-attrs/src/op-attrs/num_tensor_dims_t.cc new file mode 100644 index 0000000000..e61eaf0192 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/num_tensor_dims_t.cc @@ -0,0 +1,191 @@ +#include "op-attrs/num_tensor_dims_t.h" +#include "op-attrs/num_ptensor_shard_dims_t.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/containers/transform.h" +#include + +namespace FlexFlow { + +num_tensor_dims_t::num_tensor_dims_t(nonnegative_int value_) + : value(value_) +{ + ASSERT(this->value <= MAX_TENSOR_DIM); +} + +bool num_tensor_dims_t::operator<(num_tensor_dims_t other) const { + return this->value < other.value; +} + +bool num_tensor_dims_t::operator==(num_tensor_dims_t other) const { + return this->value == other.value; +} + +bool num_tensor_dims_t::operator>(num_tensor_dims_t other) const { + return this->value > other.value; +} + +bool num_tensor_dims_t::operator<=(num_tensor_dims_t other) const { + return this->value <= other.value; +} + +bool num_tensor_dims_t::operator!=(num_tensor_dims_t other) const { + return this->value != other.value; +} + +bool num_tensor_dims_t::operator>=(num_tensor_dims_t other) const { + return this->value >= other.value; +} + +bool num_tensor_dims_t::operator<(nonnegative_int other) const { + return this->value < other; +} + +bool num_tensor_dims_t::operator==(nonnegative_int other) const { + return this->value == other; +} + +bool num_tensor_dims_t::operator>(nonnegative_int other) const { + return this->value > other; +} + +bool num_tensor_dims_t::operator<=(nonnegative_int other) const { + return this->value <= other; +} + +bool num_tensor_dims_t::operator!=(nonnegative_int other) const { + return this->value != other; +} + +bool num_tensor_dims_t::operator>=(nonnegative_int other) const { + return this->value >= other; +} + +bool operator<(nonnegative_int lhs, num_tensor_dims_t rhs) { + return lhs < rhs.value; +} + +bool operator==(nonnegative_int lhs, num_tensor_dims_t rhs) { + return lhs == rhs.value; +} + +bool operator>(nonnegative_int lhs, num_tensor_dims_t rhs) { + return lhs > rhs.value; +} + +bool operator<=(nonnegative_int lhs, num_tensor_dims_t rhs) { + return lhs <= rhs.value; +} + +bool operator!=(nonnegative_int lhs, num_tensor_dims_t rhs) { + return lhs != rhs.value; +} + +bool operator>=(nonnegative_int lhs, num_tensor_dims_t rhs) { + return lhs >= rhs.value; +} + +bool num_tensor_dims_t::operator<(int other) const { + return this->value < other; +} + +bool num_tensor_dims_t::operator==(int other) const { + return this->value == other; +} + +bool num_tensor_dims_t::operator>(int other) const { + return this->value > other; +} + +bool num_tensor_dims_t::operator<=(int other) const { + return this->value <= other; +} + +bool num_tensor_dims_t::operator!=(int other) const { + return this->value != other; +} + +bool num_tensor_dims_t::operator>=(int other) const { + return this->value >= other; +} + +bool operator<(int lhs, num_tensor_dims_t rhs) { + return lhs < rhs.value; +} + +bool operator==(int lhs, num_tensor_dims_t rhs) { + return lhs == rhs.value; +} + +bool operator>(int lhs, num_tensor_dims_t rhs) { + return lhs > rhs.value; +} + +bool operator<=(int lhs, num_tensor_dims_t rhs) { + return lhs <= rhs.value; +} + +bool operator!=(int lhs, num_tensor_dims_t rhs) { + return lhs != rhs.value; +} + +bool operator>=(int lhs, num_tensor_dims_t rhs) { + return lhs >= rhs.value; +} + +nonnegative_int num_tensor_dims_t::nonnegative_int_from_num_tensor_dims() const { + return this->value; +} + +int num_tensor_dims_t::int_from_num_tensor_dims() const { + return this->value.unwrap_nonnegative(); +} + +void num_tensor_dims_t::check_invariant() const { + ASSERT(this->value <= MAX_TENSOR_DIM); +} + +nonnegative_int format_as(num_tensor_dims_t num_tensor_dims) { + return num_tensor_dims.nonnegative_int_from_num_tensor_dims(); +} + +std::ostream &operator<<(std::ostream &s, num_tensor_dims_t num_tensor_dims) { + return (s << fmt::to_string(num_tensor_dims)); +} + + +num_tensor_dims_t num_tensor_dims_from_num_ptensor_shard_dims(num_ptensor_shard_dims_t num_ptensor_shard_dims) { + return num_tensor_dims_t{num_ptensor_shard_dims.value}; +} + +num_tensor_dims_t num_tensor_dims_from_num_ptensor_parallel_dims(num_ptensor_parallel_dims_t num_ptensor_parallel_dims) { + return num_tensor_dims_from_num_ptensor_shard_dims( + num_ptensor_shard_dims_from_parallel_dims( + num_ptensor_parallel_dims)); +} + +num_ptensor_shard_dims_t num_ptensor_shard_dims_from_num_tensor_dims(num_tensor_dims_t num_tensor_dims) { + return num_ptensor_shard_dims_t{num_tensor_dims.nonnegative_int_from_num_tensor_dims()}; +} + +num_ptensor_parallel_dims_t num_ptensor_parallel_dims_from_num_tensor_dims(num_tensor_dims_t num_tensor_dims) { + return num_ptensor_parallel_dims_from_shard_dims( + num_ptensor_shard_dims_from_num_tensor_dims( + num_tensor_dims)); +} + +std::vector tensor_dims_range(num_tensor_dims_t num_tensor_dims) { + return transform(nonnegative_range(num_tensor_dims.nonnegative_int_from_num_tensor_dims()), + [](nonnegative_int idx) { + return ff_dim_t{idx}; + }); +} + +std::vector relative_tensor_dims_range(num_tensor_dims_t num_tensor_dims) { + return transform(nonnegative_range(num_tensor_dims.nonnegative_int_from_num_tensor_dims()), + [](nonnegative_int idx) { + return relative_ff_dim_t{idx.unwrap_nonnegative()}; + }); +} + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/operator_space_to_parallel_tensor_space_mapping.cc b/lib/op-attrs/src/op-attrs/operator_space_to_parallel_tensor_space_mapping.cc new file mode 100644 index 0000000000..9019023a76 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/operator_space_to_parallel_tensor_space_mapping.cc @@ -0,0 +1,123 @@ +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/num_ptensor_shard_dims_t.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "op-attrs/task_space_coordinate.h" +#include "utils/bidict/algorithms/bidict_from_keys_and_values.h" +#include "utils/containers/set_of.h" +#include "utils/containers/transform.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/nonnegative_int/range.h" +#include "utils/orthotope/dim_projection.h" +#include "utils/orthotope/minimal_dim_domain.h" +#include "utils/orthotope/minimal_dim_domain_mapping.h" + +namespace FlexFlow { + +OperatorSpaceToParallelTensorSpaceMapping + empty_operator_space_to_ptensor_space_map() { + + return OperatorSpaceToParallelTensorSpaceMapping{ + empty_dim_domain_mapping(), + }; +} + +OperatorTaskSpace + get_operator_task_space_for_mapping(OperatorSpaceToParallelTensorSpaceMapping const &mapping) { + + return operator_task_space_from_minimal_dim_domain( + require_dim_domain_is_minimal(mapping.raw_mapping.l_domain)); +} + +ParallelTensorDimDegrees + get_parallel_tensor_space_for_mapping(OperatorSpaceToParallelTensorSpaceMapping const &mapping) { + + return parallel_tensor_dim_degrees_from_dim_domain(mapping.raw_mapping.r_domain); +} + + +OperatorSpaceToParallelTensorSpaceMapping + get_identity_mapping( + OperatorTaskSpace const &operator_task_space, + ParallelTensorDimDegrees const ¶llel_tensor_dim_degrees) { + + MinimalDimDomain pt_minimal_dim_domain + = minimal_dim_domain_from_parallel_tensor_dim_degrees(parallel_tensor_dim_degrees); + + ASSERT(op_task_space_num_dims(operator_task_space) == minimal_dim_domain_num_dims(pt_minimal_dim_domain)); + + std::vector op_minimal_domain_dims + = sorted_by(operator_task_space_get_dim_idxs(operator_task_space), + get_operator_task_space_dim_ordering().lt); + + std::vector pt_minimal_domain_dims + = sorted_by(get_minimal_domain_dims(pt_minimal_dim_domain), + get_parallel_tensor_dim_ordering().lt); + + bidict + projection = bidict_from_keys_and_values(op_minimal_domain_dims, pt_minimal_domain_dims); + + return operator_ptensor_space_mapping_from_projection( + DimProjection{EqProjection{projection}}, + operator_task_space, + parallel_tensor_dim_degrees); +} + +OperatorSpaceToParallelTensorSpaceMapping + operator_ptensor_space_mapping_from_projection( + DimProjection const &projection, + OperatorTaskSpace const &operator_task_space, + ParallelTensorDimDegrees const ¶llel_tensor_dim_degrees) { + + return OperatorSpaceToParallelTensorSpaceMapping{ + dim_domain_mapping_from_projection( + /*projection=*/projection, + /*l_domain=*/lift_minimal_dim_domain(minimal_dim_domain_from_operator_task_space(operator_task_space)), + /*r_domain=*/lift_minimal_dim_domain(minimal_dim_domain_from_parallel_tensor_dim_degrees(parallel_tensor_dim_degrees)), + /*l_dim_ordering=*/get_operator_task_space_dim_ordering(), + /*r_dim_ordering=*/get_parallel_tensor_dim_ordering()), + }; +} + +OperatorSpaceToParallelTensorSpaceMapping + operator_ptensor_space_mapping_from_composition( + OperatorSpaceToParallelTensorSpaceMapping const &op_to_pt1_mapping, + ParallelTensorSpaceToParallelTensorSpaceMapping const &pt1_to_pt2_mapping) { + + return OperatorSpaceToParallelTensorSpaceMapping{ + compose_dim_domain_mappings_through_minimal( + op_to_pt1_mapping.raw_mapping, + pt1_to_pt2_mapping.raw_mapping), + }; +} + + +ParallelTensorSpaceCoordinate + ptensor_coord_for_task_space_coord( + OperatorSpaceToParallelTensorSpaceMapping const &mapping, + TaskSpaceCoordinate const &task_space_coordinate) { + + DimCoord dim_coord = + mapping.raw_mapping.at_l( + dim_coord_from_task_space_coordinate(task_space_coordinate)); + + return parallel_tensor_space_coord_from_dim_coord(dim_coord); +} + +TaskSpaceCoordinate + task_space_coord_for_ptensor_coord( + OperatorSpaceToParallelTensorSpaceMapping const &mapping, + ParallelTensorSpaceCoordinate const &ptensor_space_coord) { + + DimCoord dim_coord = + mapping.raw_mapping.at_r( + dim_coord_from_parallel_tensor_space_coord(ptensor_space_coord)); + + return task_space_coordinate_from_dim_coord(dim_coord); +} + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/operator_task_space.cc b/lib/op-attrs/src/op-attrs/operator_task_space.cc new file mode 100644 index 0000000000..9d0efc5905 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/operator_task_space.cc @@ -0,0 +1,112 @@ +#include "op-attrs/operator_task_space.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/operator_task_space_dim_idx_t.h" +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "utils/containers/cartesian_product.h" +#include "utils/containers/extend.h" +#include "utils/containers/maximum.h" +#include "utils/containers/product.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/containers/vector_of.h" +#include "utils/fmt/unordered_set.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/orthotope/dim_domain.h" +#include "utils/orthotope/dim_ordering.h" +#include "utils/orthotope/minimal_dim_domain.h" +#include "utils/orthotope/minimal_orthotope.h" +#include "utils/orthotope/orthotope.dtg.h" +#include "utils/orthotope/orthotope.h" + +namespace FlexFlow { + +OperatorTaskSpace trivial_op_task_space() { + return OperatorTaskSpace{MinimalOrthotope{{}}}; +} + +std::unordered_set + operator_task_space_get_dim_idxs(OperatorTaskSpace const &op_task_space) { + return get_minimal_domain_dims(minimal_dim_domain_from_operator_task_space(op_task_space)); +} + +std::unordered_set + get_task_space_coordinates(OperatorTaskSpace const &task) { + + std::vector> coordinate_ranges = + transform(task.degrees.dims, + [&](int_ge_two num_points) { + return nonnegative_range( + num_points.nonnegative_int_from_int_ge_two()); + }); + + std::unordered_set> raw_coordinates = + unordered_set_of(cartesian_product(coordinate_ranges)); + std::unordered_set task_space_coordinates = + transform(raw_coordinates, [](std::vector const &point) { + return TaskSpaceCoordinate{OrthotopeCoord{point}}; + }); + return task_space_coordinates; +} + +bool operator_task_space_contains_coord(OperatorTaskSpace const &task_space, + TaskSpaceCoordinate const &coord) { + return contains(get_task_space_coordinates(task_space), coord); +} + +TaskSpaceCoordinate + get_task_space_maximum_coordinate(OperatorTaskSpace const &task) { + return maximum(get_task_space_coordinates(task)); +} + +nonnegative_int op_task_space_num_dims(OperatorTaskSpace const &op_task_space) { + return minimal_orthotope_get_num_dims(op_task_space.degrees); +} + +positive_int num_tasks(OperatorTaskSpace const &op_task_space) { + return minimal_orthotope_get_volume(op_task_space.degrees); +} + +MinimalDimDomain + minimal_dim_domain_from_operator_task_space(OperatorTaskSpace const &operator_task_space) { + + MinimalOrthotope minimal_orthotope = operator_task_space.degrees; + + return minimal_dim_domain_from_minimal_orthotope( + minimal_orthotope, + unordered_set_of(operator_task_space_dim_idx_range(minimal_orthotope_get_num_dims(minimal_orthotope))), + get_operator_task_space_dim_ordering()); +} + +OperatorTaskSpace + operator_task_space_from_minimal_dim_domain(MinimalDimDomain const &minimal_dim_domain) { + + return OperatorTaskSpace{ + minimal_orthotope_from_minimal_dim_domain( + minimal_dim_domain, + get_operator_task_space_dim_ordering()), + }; +} + + +DimOrdering + get_operator_task_space_dim_ordering() { + return make_default_dim_ordering(); +} + +OperatorTaskSpace + get_operator_task_space_matching_parallel_tensor_dim_degrees( + ParallelTensorDimDegrees const &dim_degrees) { + return OperatorTaskSpace{ + minimal_orthotope_from_minimal_dim_domain( + minimal_dim_domain_from_parallel_tensor_dim_degrees(dim_degrees), + get_parallel_tensor_dim_ordering()), + }; +} + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/operator_task_space_dim_idx_t.cc b/lib/op-attrs/src/op-attrs/operator_task_space_dim_idx_t.cc new file mode 100644 index 0000000000..c78afc502f --- /dev/null +++ b/lib/op-attrs/src/op-attrs/operator_task_space_dim_idx_t.cc @@ -0,0 +1,15 @@ +#include "op-attrs/operator_task_space_dim_idx_t.h" +#include "utils/containers/set_of.h" +#include "utils/containers/transform.h" +#include "utils/nonnegative_int/range.h" + +namespace FlexFlow { + +std::set + operator_task_space_dim_idx_range(nonnegative_int end) { + return transform(set_of(range(end)), [](nonnegative_int raw_idx) { + return operator_task_space_dim_idx_t{raw_idx}; + }); +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/operator_task_space_to_operator_task_space_mapping.cc b/lib/op-attrs/src/op-attrs/operator_task_space_to_operator_task_space_mapping.cc new file mode 100644 index 0000000000..580f7157d7 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/operator_task_space_to_operator_task_space_mapping.cc @@ -0,0 +1,59 @@ +#include "op-attrs/operator_task_space_to_operator_task_space_mapping.h" +#include "op-attrs/operator_task_space.h" +#include "utils/bidict/algorithms/transform_keys.h" +#include "utils/bidict/algorithms/transform_values.h" +#include "utils/orthotope/minimal_dim_domain.h" +#include "op-attrs/task_space_coordinate.h" +#include "utils/orthotope/minimal_dim_domain_mapping.h" + +namespace FlexFlow { + +OperatorTaskSpaceToOperatorTaskSpaceMapping op_to_op_identity_mapping( + OperatorTaskSpace const &src_space, + OperatorTaskSpace const &dst_space) { + + return OperatorTaskSpaceToOperatorTaskSpaceMapping{ + dim_domain_mapping_identity_map( + /*l_domain=*/lift_minimal_dim_domain(minimal_dim_domain_from_operator_task_space(src_space)), + /*r_domain=*/lift_minimal_dim_domain(minimal_dim_domain_from_operator_task_space(dst_space)), + /*l_dim_ordering=*/get_operator_task_space_dim_ordering(), + /*r_dim_ordering=*/get_operator_task_space_dim_ordering()), + }; +} + +OperatorTaskSpace + op_mapping_get_src_space(OperatorTaskSpaceToOperatorTaskSpaceMapping const &mapping) { + + return operator_task_space_from_minimal_dim_domain( + require_dim_domain_is_minimal(mapping.raw_mapping.l_domain)); +} + +OperatorTaskSpace + op_mapping_get_dst_space(OperatorTaskSpaceToOperatorTaskSpaceMapping const &mapping) { + + return operator_task_space_from_minimal_dim_domain( + require_dim_domain_is_minimal(mapping.raw_mapping.r_domain)); +} + +bidict + op_to_op_get_coord_mapping(OperatorTaskSpaceToOperatorTaskSpaceMapping const &mapping) { + return transform_values( + transform_keys(mapping.raw_mapping.coord_mapping, + task_space_coordinate_from_dim_coord), + task_space_coordinate_from_dim_coord); +} + +OperatorTaskSpaceToOperatorTaskSpaceMapping + op_to_op_mapping_from_composition_through_tensor( + OperatorSpaceToParallelTensorSpaceMapping const &src_to_tensor_mapping, + OperatorSpaceToParallelTensorSpaceMapping const &dst_to_tensor_mapping) { + + return OperatorTaskSpaceToOperatorTaskSpaceMapping{ + compose_dim_domain_mappings_through_minimal( + src_to_tensor_mapping.raw_mapping, + invert_dim_domain_mapping(dst_to_tensor_mapping.raw_mapping)), + }; +} + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/attention.cc b/lib/op-attrs/src/op-attrs/ops/attention.cc index cc6ef8cfac..dfd6f77a93 100644 --- a/lib/op-attrs/src/op-attrs/ops/attention.cc +++ b/lib/op-attrs/src/op-attrs/ops/attention.cc @@ -5,8 +5,10 @@ #include "op-attrs/tensor_dims.h" #include "op-attrs/tensor_shape.h" #include "utils/containers/extend.h" +#include "utils/exception.h" #include "utils/expected.h" #include "utils/integer_conversions.h" +#include namespace FlexFlow { @@ -95,10 +97,9 @@ positive_int get_num_samples(MultiHeadAttentionInputs const &inputs) { } static void check_attrs(MultiHeadAttentionAttrs const &attrs) { - if (attrs.add_bias_kv) { - throw mk_runtime_error("add_bias_kv is not yet supported. If you need this " - "functionality, please create an issue."); - } + ASSERT(!attrs.add_bias_kv, + "add_bias_kv is not yet supported. If you need this " + "functionality, please create an issue."); } std::vector diff --git a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc index 3225f1aef2..f9d00dc523 100644 --- a/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc +++ b/lib/op-attrs/src/op-attrs/ops/attention/multihead_attention_parallel_inputs.cc @@ -23,19 +23,19 @@ tl::expected unpar_parse_result.error())); } - if (num_shard_dims(input_q) != 3) { + if (num_shard_dims(input_q).value != 3) { return tl::unexpected( fmt::format("Query input has incorrect number of dims: {} != {}", num_shard_dims(input_q), 3)); } - if (num_shard_dims(input_k) != 3) { + if (num_shard_dims(input_k).value != 3) { return tl::unexpected( fmt::format("Key input has incorrect number of dims: {} != {}", num_shard_dims(input_k), 3)); } - if (num_shard_dims(input_v) != 3) { + if (num_shard_dims(input_v).value != 3) { return tl::unexpected( fmt::format("Value input has incorrect number of dims: {} != {}", num_shard_dims(input_v), diff --git a/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc b/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc index 3c76561d17..268b83833b 100644 --- a/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc +++ b/lib/op-attrs/src/op-attrs/ops/batch_matmul.cc @@ -1,6 +1,7 @@ #include "op-attrs/ops/batch_matmul.h" #include "op-attrs/parallel_tensor_shape.h" #include "op-attrs/tensor_dims.h" +#include "utils/exception.h" namespace FlexFlow { @@ -91,13 +92,13 @@ tl::expected get_output_shape(BatchMatmulAttrs const &attrs, ParallelTensorShape const &input_lhs, ParallelTensorShape const &input_rhs) { - if (num_shard_dims(input_lhs) != 3) { + if (num_shard_dims(input_lhs).value != 3) { return tl::unexpected( fmt::format("LHS input has incorrect number of shard dims: {} != {}", num_shard_dims(input_lhs), 3)); } - if (num_shard_dims(input_rhs) != 3) { + if (num_shard_dims(input_rhs).value != 3) { return tl::unexpected( fmt::format("RHS input has incorrect number of shard dims: {} != {}", num_shard_dims(input_rhs), diff --git a/lib/op-attrs/src/op-attrs/ops/broadcast.cc b/lib/op-attrs/src/op-attrs/ops/broadcast.cc index d84a9ee46e..927d4fd913 100644 --- a/lib/op-attrs/src/op-attrs/ops/broadcast.cc +++ b/lib/op-attrs/src/op-attrs/ops/broadcast.cc @@ -1,5 +1,7 @@ #include "op-attrs/ops/broadcast.h" +#include "op-attrs/num_tensor_dims_t.h" #include "op-attrs/tensor_dims.h" +#include "utils/exception.h" #include "utils/record_formatter.h" namespace FlexFlow { @@ -13,9 +15,9 @@ RecordFormatter as_dot(BroadcastAttrs const &attrs) { return rr; }; - for (int i = 0; i < get_num_dims(attrs.target_dims); i++) { - r << kv(fmt::format("target_dims[{}]", i), - dim_at_idx(attrs.target_dims, relative_ff_dim_t{i})); + for (ff_dim_t dim_idx : tensor_dims_range(get_num_dims(attrs.target_dims))) { + r << kv(fmt::format("target_dims[{}]", dim_idx.value), + dim_at_idx(attrs.target_dims, dim_idx)); } return r; diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d.cc index 6ff1b8a06e..9ca8398c0a 100644 --- a/lib/op-attrs/src/op-attrs/ops/conv_2d.cc +++ b/lib/op-attrs/src/op-attrs/ops/conv_2d.cc @@ -4,6 +4,7 @@ #include "op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.h" #include "utils/fmt/optional.h" #include "utils/integer_conversions.h" +#include namespace FlexFlow { @@ -198,12 +199,6 @@ std::vector std::optional maybe_kernel_initializer, std::optional maybe_bias_initializer) { - if (!attrs.use_bias && maybe_bias_initializer.has_value()) { - throw mk_runtime_error(fmt::format( - "Unexpectedly received bias initializer while use_bias=false: {}", - maybe_bias_initializer)); - } - TensorShape kernel_shape = get_kernel_shape(attrs, input_shape); InitializerAttrs kernel_default_initializer = diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc index 79bb14f2b2..1cac441c08 100644 --- a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc +++ b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_input_shape.cc @@ -1,6 +1,8 @@ #include "op-attrs/ops/conv_2d/conv_2d_input_shape.h" #include "op-attrs/tensor_dims.h" #include "op-attrs/tensor_shape.h" +#include +#include "op-attrs/num_tensor_dims_t.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc index 8143353b2d..e08bd4bec2 100644 --- a/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc +++ b/lib/op-attrs/src/op-attrs/ops/conv_2d/conv_2d_parallel_input_shape.cc @@ -5,7 +5,7 @@ namespace FlexFlow { Conv2DParallelInputShape parse_parallel_input_shape(ParallelTensorShape const &input) { - assert(num_shard_dims(input) == 4); + assert(num_shard_dims(input).value == 4); ShardParallelDim sample_dim = shard_dim_at_idx(input, relative_ff_dim_t{0}); ShardParallelDim channel_dim = shard_dim_at_idx(input, relative_ff_dim_t{1}); diff --git a/lib/op-attrs/src/op-attrs/ops/element_binary.cc b/lib/op-attrs/src/op-attrs/ops/element_binary.cc index 16957a036c..825eadfca2 100644 --- a/lib/op-attrs/src/op-attrs/ops/element_binary.cc +++ b/lib/op-attrs/src/op-attrs/ops/element_binary.cc @@ -1,55 +1,63 @@ #include "op-attrs/ops/element_binary.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/operator_task_space.h" +#include "utils/containers/require_same.h" +#include "utils/exception.h" namespace FlexFlow { -tl::expected - get_output_shape(ElementBinaryAttrs const &attrs, - TensorShape const &input_lhs, - TensorShape const &input_rhs) { - assert(!(attrs.should_broadcast_lhs && attrs.should_broadcast_rhs)); +TensorShape get_output_shape(ElementBinaryAttrs const &attrs, + TensorShape const &input_lhs, + TensorShape const &input_rhs) { + ASSERT(!attrs.should_broadcast_lhs && !attrs.should_broadcast_rhs, + "ElementBinary broadcasting is currently not supported. " + "Contact @lockshaw if you want this feature implemented."); if (attrs.should_broadcast_lhs) { NOT_IMPLEMENTED(); } else if (attrs.should_broadcast_rhs) { NOT_IMPLEMENTED(); } else { - if (input_lhs != input_rhs) { - return tl::unexpected(fmt::format( - "Expected input shapes to match, but receieved LHS ({}) != RHS ({})", - input_lhs, - input_rhs)); - } + ASSERT(input_lhs == input_rhs, "Expected input shapes to match"); return input_lhs; } } -tl::expected - get_output_shape(ElementBinaryAttrs const &attrs, - ParallelTensorShape const &input_lhs, - ParallelTensorShape const &input_rhs) { - assert(!(attrs.should_broadcast_lhs && attrs.should_broadcast_rhs)); +ParallelTensorShape get_output_shape(ElementBinaryAttrs const &attrs, + ParallelTensorShape const &input_lhs, + ParallelTensorShape const &input_rhs) { + TensorShape output_shape = get_output_shape(attrs, get_reduced_shape(input_lhs), get_reduced_shape(input_rhs)); + + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, + get_parallel_degrees(input_lhs), + get_parallel_degrees(input_rhs)); + + return lift_to_parallel_with_degrees(output_shape, output_degrees); +} + +ParallelTensorDimDegrees + get_output_parallel_dim_degrees(ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees) { + ASSERT(!attrs.should_broadcast_lhs && !attrs.should_broadcast_rhs, + "ElementBinary broadcasting is currently not supported. " + "Contact @lockshaw if you want this feature implemented."); + + ASSERT(lhs_input_degrees == rhs_input_degrees); if (attrs.should_broadcast_lhs) { NOT_IMPLEMENTED(); } else if (attrs.should_broadcast_rhs) { NOT_IMPLEMENTED(); } else { - if (input_lhs != input_rhs) { - return tl::unexpected(fmt::format( - "Expected input shapes to match, but receieved LHS ({}) != RHS ({})", - input_lhs, - input_rhs)); - } + ASSERT(lhs_input_degrees == rhs_input_degrees, "Expected input degrees to match"); switch (attrs.type) { case OperatorType::EW_ADD: { - if (get_discard_copy_degree(input_lhs) != 1) { - return tl::unexpected( - fmt::format("Elementwise Add expected discard copy degree of " - "inputs to be 1, but receieved {}", - get_discard_copy_degree(input_lhs))); - } + ASSERT( + lhs_input_degrees.discard_copy_degree.value == 1, + "Elementwise Add expected discard copy degree of inputs to be 1"); break; } @@ -64,12 +72,55 @@ tl::expected case OperatorType::EW_MIN: NOT_IMPLEMENTED(); default: - return tl::unexpected(fmt::format( - "Unexpected element-wise binary operator {}", attrs.type)); + PANIC("Unexpected element-wise binary operator", attrs.type); } - return input_lhs; + return lhs_input_degrees; } } +OperatorTaskSpace + get_operator_task_space(ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees) { + + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, lhs_input_degrees, rhs_input_degrees); + + return get_operator_task_space_matching_parallel_tensor_dim_degrees(output_degrees); +} + + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_lhs_input_mapping( + ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees) { + + return get_identity_mapping( + get_operator_task_space(attrs, lhs_input_degrees, rhs_input_degrees), + lhs_input_degrees); +} + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_rhs_input_mapping( + ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees) { + + return get_identity_mapping( + get_operator_task_space(attrs, lhs_input_degrees, rhs_input_degrees), + rhs_input_degrees); +} + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_output_mapping( + ElementBinaryAttrs const &attrs, + ParallelTensorDimDegrees const &lhs_input_degrees, + ParallelTensorDimDegrees const &rhs_input_degrees) { + + ParallelTensorDimDegrees output_dim_degrees + = get_output_parallel_dim_degrees(attrs, lhs_input_degrees, rhs_input_degrees); + + return get_identity_mapping( + get_operator_task_space(attrs, lhs_input_degrees, rhs_input_degrees), + output_dim_degrees); +} + } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/element_unary.cc b/lib/op-attrs/src/op-attrs/ops/element_unary.cc index fd65e1f5c9..b4b79bfc89 100644 --- a/lib/op-attrs/src/op-attrs/ops/element_unary.cc +++ b/lib/op-attrs/src/op-attrs/ops/element_unary.cc @@ -1,5 +1,10 @@ #include "op-attrs/ops/element_unary.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" #include "op-attrs/parallel_tensor_shape.h" +#include "utils/orthotope/minimal_dim_domain.h" namespace FlexFlow { @@ -10,28 +15,56 @@ ElementUnaryAttrs make_relu_attrs() { }; } -tl::expected +TensorShape get_output_shape(ElementUnaryAttrs const &attrs, TensorShape const &input_shape) { return input_shape; } -tl::expected +ParallelTensorShape get_output_shape(ElementUnaryAttrs const &attrs, ParallelTensorShape const &input_shape) { - if (get_sum_degree(input_shape) != 1) { - return tl::unexpected( - fmt::format("Expected sum degree 1, but receieved sum degree {}", - get_sum_degree(input_shape))); - } - - if (get_discard_copy_degree(input_shape) != 1) { - return tl::unexpected(fmt::format( - "Expected discard copy degree 1, but received discartd copy degree {}", - get_discard_copy_degree(input_shape))); - } + TensorShape output_shape = get_output_shape(attrs, get_reduced_shape(input_shape)); - return input_shape; + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, get_parallel_degrees(input_shape)); + + return lift_to_parallel_with_degrees(output_shape, output_degrees); +} + +ParallelTensorDimDegrees get_output_parallel_dim_degrees( + ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + ASSERT(input_degrees.sum_degree.value == 1); + ASSERT(input_degrees.discard_copy_degree.value == 1); + + return input_degrees; +} + +OperatorTaskSpace get_operator_task_space(ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, input_degrees); + + return get_operator_task_space_matching_parallel_tensor_dim_degrees(output_degrees); +} + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_input_mapping( + ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + return get_identity_mapping( + get_operator_task_space(attrs, input_degrees), + input_degrees); +} + +OperatorSpaceToParallelTensorSpaceMapping get_operator_to_output_mapping( + ElementUnaryAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, input_degrees); + + return get_identity_mapping( + get_operator_task_space(attrs, input_degrees), + output_degrees); } } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/flat.cc b/lib/op-attrs/src/op-attrs/ops/flat.cc index 14180cecf8..5469380c05 100644 --- a/lib/op-attrs/src/op-attrs/ops/flat.cc +++ b/lib/op-attrs/src/op-attrs/ops/flat.cc @@ -34,9 +34,8 @@ TensorShape get_output_shape(FlatAttrs const &attrs, }; } -tl::expected - get_output_parallel_dim_degrees( - FlatAttrs const &attrs, ParallelTensorDimDegrees const &input_degrees) { +ParallelTensorDimDegrees get_output_parallel_dim_degrees( + FlatAttrs const &attrs, ParallelTensorDimDegrees const &input_degrees) { FFOrdered flattened_dim_degrees = slice(input_degrees.shard_degrees, attrs.start_dim, attrs.end_dim); @@ -44,14 +43,10 @@ tl::expected return input_degrees; } - if (any_of(flattened_dim_degrees, - [](positive_int degree) { return degree != 1; })) { - return tl::unexpected( - fmt::format("get_output_parallel_dim_degrees for {} expected all shard " - "degrees of flattened dimensions to be 1, but received {}", - attrs, - input_degrees)); - } + ASSERT(any_of(flattened_dim_degrees, + [](positive_int degree) { return degree != 1; }), + "get_output_parallel_dim_degrees for {} expected all shard degrees of " + "flattened dimensions to be 1"); return ParallelTensorDimDegrees{ /*sum_degree=*/input_degrees.sum_degree, @@ -65,20 +60,12 @@ tl::expected }; } -tl::expected - get_output_shape(FlatAttrs const &attrs, - ParallelTensorShape const &input_shape) { +ParallelTensorShape get_output_shape(FlatAttrs const &attrs, + ParallelTensorShape const &input_shape) { TensorShape unpar = get_output_shape(attrs, get_reduced_shape(input_shape)); - ParallelTensorDimDegrees degrees = ({ - tl::expected returned = - get_output_parallel_dim_degrees(attrs, - get_parallel_degrees(input_shape)); - if (!returned.has_value()) { - return tl::unexpected(returned.error()); - } - returned.value(); - }); + ParallelTensorDimDegrees degrees = + get_output_parallel_dim_degrees(attrs, get_parallel_degrees(input_shape)); return lift_to_parallel_with_degrees(unpar, degrees); } diff --git a/lib/op-attrs/src/op-attrs/ops/gather.cc b/lib/op-attrs/src/op-attrs/ops/gather.cc index 4b1053aee1..2c5a4bbdc0 100644 --- a/lib/op-attrs/src/op-attrs/ops/gather.cc +++ b/lib/op-attrs/src/op-attrs/ops/gather.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/gather.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/input.cc b/lib/op-attrs/src/op-attrs/ops/input.cc index d1f68584b9..90bb2adb03 100644 --- a/lib/op-attrs/src/op-attrs/ops/input.cc +++ b/lib/op-attrs/src/op-attrs/ops/input.cc @@ -1,4 +1,6 @@ #include "op-attrs/ops/input.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/operator_task_space.h" #include "op-attrs/parallel_tensor_shape.h" namespace FlexFlow { @@ -11,4 +13,14 @@ ParallelTensorShape get_output_parallel_tensor_shape(InputAttrs const &attrs) { return lift_to_parallel(attrs.tensor_shape); } +OperatorTaskSpace get_operator_task_space(InputAttrs const &) { + return trivial_op_task_space(); +} + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(InputAttrs const &attrs) { + + return empty_operator_space_to_ptensor_space_map(); +} + } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/layer_norm.cc b/lib/op-attrs/src/op-attrs/ops/layer_norm.cc index e0db1cdfe7..c58a2bba62 100644 --- a/lib/op-attrs/src/op-attrs/ops/layer_norm.cc +++ b/lib/op-attrs/src/op-attrs/ops/layer_norm.cc @@ -9,6 +9,7 @@ #include "utils/containers/contains.h" #include "utils/containers/extend.h" #include "utils/containers/filter.h" +#include "utils/containers/vector_of.h" #include "utils/expected.h" #include "utils/fmt/set.h" @@ -72,7 +73,7 @@ tl::expected } std::vector non_layer_norm_dim_idxs = filter( - get_idxs(input_shape.dims.ff_ordered), + vector_of(get_idxs(input_shape.dims.ff_ordered)), [&](ff_dim_t const &dim_idx) { return !contains(attrs.axes, dim_idx); }); std::vector raw_weight_dims = transform(non_layer_norm_dim_idxs, [&](ff_dim_t const &dim_idx) { @@ -180,7 +181,7 @@ tl::expected } std::vector non_layer_norm_dim_idxs = filter( - get_idxs(input_shape.dims.shard_dims), + vector_of(get_idxs(input_shape.dims.shard_dims)), [&](ff_dim_t const &dim_idx) { return !contains(attrs.axes, dim_idx); }); std::vector raw_weight_shard_dims = transform(non_layer_norm_dim_idxs, [&](ff_dim_t const &dim_idx) { diff --git a/lib/op-attrs/src/op-attrs/ops/linear.cc b/lib/op-attrs/src/op-attrs/ops/linear.cc index 37f504f873..7611ce66d7 100644 --- a/lib/op-attrs/src/op-attrs/ops/linear.cc +++ b/lib/op-attrs/src/op-attrs/ops/linear.cc @@ -2,13 +2,27 @@ #include "op-attrs/ff_ordered/slice.h" #include "op-attrs/ff_ordered/transform.h" #include "op-attrs/initializers/kaiming_initializer_mode.h" +#include "op-attrs/num_ptensor_shard_dims_t.h" +#include "op-attrs/num_tensor_dims_t.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/relative_ff_dim_t.h" #include "op-attrs/tensor_dims.h" #include "op-attrs/tensor_shape.h" #include "utils/containers/product.h" +#include "utils/containers/unordered_set_of.h" #include "utils/expected.h" #include "utils/fmt/optional.h" #include "utils/integer_conversions.h" +#include "utils/orthotope/dim_projection.h" +#include "utils/orthotope/down_projection.h" +#include "utils/orthotope/eq_projection.h" +#include "utils/orthotope/minimal_dim_domain_mapping.h" +#include "utils/orthotope/up_projection.h" namespace FlexFlow { @@ -101,18 +115,10 @@ tl::expected result_unpar.value(); }); - SumDegree sum_degree = SumDegree{1_p}; - DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{ - get_sum_degree(input) * product(slice(ff_ordered_shard_degrees(input), - relative_ff_dim_t{0}, - relative_ff_dim_t{-1}))}; - FFOrdered shard_degrees = FFOrdered{ - get_discard_copy_degree(input), - shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree, - }; + ParallelTensorDimDegrees projection_degrees = get_projection_parallel_dim_degrees(attrs, get_parallel_degrees(input)); return lift_to_parallel_with_degrees( - unpar, sum_degree, discard_copy_degree, shard_degrees); + unpar, projection_degrees); } tl::expected @@ -126,18 +132,10 @@ tl::expected result_unpar.value(); }); - SumDegree sum_degree = - SumDegree{get_sum_degree(input) * - shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree}; - DiscardCopyDegree discard_copy_degree = - DiscardCopyDegree{product(slice(ff_ordered_shard_degrees(input), - relative_ff_dim_t{0}, - relative_ff_dim_t{-1}))}; - FFOrdered shard_degrees = - FFOrdered{get_discard_copy_degree(input)}; + ParallelTensorDimDegrees bias_degrees = get_bias_parallel_dim_degrees(attrs, get_parallel_degrees(input)); return lift_to_parallel_with_degrees( - unpar, sum_degree, discard_copy_degree, shard_degrees); + unpar, bias_degrees); } tl::expected @@ -152,15 +150,73 @@ tl::expected result_unpar.value(); }); + ParallelTensorDimDegrees output_degrees = + get_output_parallel_dim_degrees(attrs, get_parallel_degrees(input)); + + return lift_to_parallel_with_degrees(unpar, output_degrees); +} + +ParallelTensorDimDegrees + get_projection_parallel_dim_degrees(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input) { + SumDegree sum_degree = SumDegree{1_p}; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{ + input.sum_degree.value * product(slice(input.shard_degrees, + relative_ff_dim_t{0}, + relative_ff_dim_t{-1}))}; + FFOrdered shard_degrees = FFOrdered{ + input.discard_copy_degree.value, + input.shard_degrees.at(relative_ff_dim_t{-1}), + }; + + return ParallelTensorDimDegrees{ + /*sum_degree=*/sum_degree, + /*discard_copy_degree=*/discard_copy_degree, + /*shard_degrees=*/shard_degrees, + }; +} + +ParallelTensorDimDegrees + get_bias_parallel_dim_degrees(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input) { + + SumDegree sum_degree = + SumDegree{ + input.sum_degree.value * + input.shard_degrees.at(relative_ff_dim_t{-1}), + }; + DiscardCopyDegree discard_copy_degree = + DiscardCopyDegree{product(slice(input.shard_degrees, + relative_ff_dim_t{0}, + relative_ff_dim_t{-1}))}; + FFOrdered shard_degrees = + FFOrdered{input.discard_copy_degree.value}; + + return ParallelTensorDimDegrees{ + /*sum_degree=*/sum_degree, + /*discard_copy_degree=*/discard_copy_degree, + /*shard_degrees=*/shard_degrees, + }; +} + +ParallelTensorDimDegrees + get_output_parallel_dim_degrees(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input) { SumDegree sum_degree = - SumDegree{get_sum_degree(input) * - shard_dim_at_idx(input, relative_ff_dim_t{-1}).degree}; + SumDegree{ + input.sum_degree.value * + input.shard_degrees.at(relative_ff_dim_t{-1}), + }; + DiscardCopyDegree discard_copy_degree = DiscardCopyDegree{1_p}; - FFOrdered shard_degrees = ff_ordered_shard_degrees(input); - shard_degrees.at(relative_ff_dim_t{-1}) = get_discard_copy_degree(input); + FFOrdered shard_degrees = input.shard_degrees; + shard_degrees.at(relative_ff_dim_t{-1}) = input.discard_copy_degree.value; - return lift_to_parallel_with_degrees( - unpar, sum_degree, discard_copy_degree, shard_degrees); + return ParallelTensorDimDegrees{ + /*sum_degree=*/sum_degree, + /*discard_copy_degree=*/discard_copy_degree, + /*shard_degrees=*/shard_degrees, + }; } tl::expected, std::string> @@ -233,4 +289,223 @@ tl::expected, std::string> get_initializers( } } +OperatorTaskSpace get_operator_task_space( + LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees( + attrs, input_degrees); + + return get_operator_task_space_matching_parallel_tensor_dim_degrees(output_degrees); +} + + +static ParallelTensorSpaceToParallelTensorSpaceMapping + get_input_to_output_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + num_tensor_dims_t input_num_dims = get_ptensor_dim_degrees_num_tensor_dims(input_degrees); + + DownProjection + inp_to_out = make_empty_down_projection(); + + ff_dim_t input_channel_dim = + ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t{-1}, input_num_dims); + + num_tensor_dims_t output_num_dims = input_num_dims; + ff_dim_t output_channel_dim = + ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t{-1}, output_num_dims); + + project_dims(inp_to_out, + /*from=*/{sum_dim_idx(), shard_dim_idx(input_channel_dim)}, + /*onto=*/sum_dim_idx()); + project_dims(inp_to_out, + /*from=*/{discard_copy_dim_idx()}, + /*onto=*/shard_dim_idx(output_channel_dim)); + + for (ff_dim_t const &idx : + slice(tensor_dims_range(input_num_dims), 0, -1)) { + project_dims(inp_to_out, + /*from=*/{shard_dim_idx(idx)}, + /*onto=*/shard_dim_idx(idx)); + } + + ParallelTensorDimDegrees output_degrees = + get_output_parallel_dim_degrees(attrs, input_degrees); + + return parallel_tensor_space_mapping_from_projection( + DimProjection{inp_to_out}, input_degrees, output_degrees); +} + +static ParallelTensorSpaceToParallelTensorSpaceMapping + get_input_to_projection_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + num_ptensor_shard_dims_t input_num_shard_dims = + get_ptensor_dim_degrees_num_shard_dims(input_degrees); + + DownProjection + inp_to_proj = make_empty_down_projection(); + + parallel_tensor_dim_idx_t input_channel_dim = parallel_tensor_dim_idx_t{ + ff_dim_t{ + nonnegative_int{ + input_num_shard_dims.value.unwrap_nonnegative() - 1, + }, + }, + }; + + { + std::unordered_set dims_from = + unordered_set_of(dim_idxs_for_num_shard_dims( + input_num_shard_dims.value + )); + dims_from.insert(sum_dim_idx()); + dims_from.erase(input_channel_dim); + + project_dims(inp_to_proj, + /*from=*/dims_from, + /*onto=*/discard_copy_dim_idx()); + } + + parallel_tensor_dim_idx_t projection_in_channel_dim + = parallel_tensor_dim_idx_t{ff_dim_t{0_n}}; + + parallel_tensor_dim_idx_t projection_out_channel_dim + = parallel_tensor_dim_idx_t{ff_dim_t{1_n}}; + + + project_dims(inp_to_proj, + /*from=*/{discard_copy_dim_idx()}, + /*onto=*/projection_out_channel_dim); + + project_dims(inp_to_proj, + /*from=*/{input_channel_dim}, + /*onto=*/projection_in_channel_dim); + + project_dims(inp_to_proj, + /*from=*/{}, + /*onto=*/discard_copy_dim_idx()); + + ParallelTensorDimDegrees projection_degrees = + get_projection_parallel_dim_degrees(attrs, input_degrees); + + return parallel_tensor_space_mapping_from_projection( + DimProjection{inp_to_proj}, input_degrees, projection_degrees); +} + +static ParallelTensorSpaceToParallelTensorSpaceMapping + get_input_to_bias_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + ASSERT(attrs.use_bias); + + num_ptensor_shard_dims_t input_num_shard_dims = get_ptensor_dim_degrees_num_shard_dims(input_degrees); + + ParallelTensorDimDegrees bias_degrees = get_bias_parallel_dim_degrees(attrs, input_degrees); + + DownProjection + inp_to_bias = make_empty_down_projection(); + + parallel_tensor_dim_idx_t input_channel_dim = parallel_tensor_dim_idx_t{ + ff_dim_t{ + nonnegative_int{ + input_num_shard_dims.value.unwrap_nonnegative() - 1, + }, + }, + }; + + { + std::unordered_set dims_from = + unordered_set_of(dim_idxs_for_num_shard_dims( + input_num_shard_dims.value + )); + dims_from.erase(input_channel_dim); + + project_dims(inp_to_bias, + /*from=*/dims_from, + /*onto=*/discard_copy_dim_idx()); + } + + parallel_tensor_dim_idx_t bias_out_channel_dim + = parallel_tensor_dim_idx_t{ff_dim_t{0_n}}; + + project_dims(inp_to_bias, + /*from=*/{ + sum_dim_idx(), + input_channel_dim, + }, + /*onto=*/sum_dim_idx()); + + project_dims(inp_to_bias, + /*from=*/{}, + /*onto=*/discard_copy_dim_idx()); + + DimDomain l_domain = + dim_domain_from_parallel_tensor_dim_degrees(input_degrees); + DimDomain r_domain = + dim_domain_from_parallel_tensor_dim_degrees(bias_degrees); + + return parallel_tensor_space_mapping_from_projection( + DimProjection{inp_to_bias}, + input_degrees, + bias_degrees); +} + + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_projection_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + return operator_ptensor_space_mapping_from_composition( + get_operator_to_input_mapping(attrs, input_degrees), + get_input_to_projection_mapping(attrs, input_degrees)); +} + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_input_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + DimDomainMapping< + parallel_tensor_dim_idx_t, + parallel_tensor_dim_idx_t + > inp_to_out = get_input_to_output_mapping(attrs, input_degrees).raw_mapping; + + DimDomainMapping< + operator_task_space_dim_idx_t, + parallel_tensor_dim_idx_t + > op_to_out = get_operator_to_output_mapping(attrs, input_degrees).raw_mapping; + + DimDomainMapping< + operator_task_space_dim_idx_t, + parallel_tensor_dim_idx_t + > op_to_inp = compose_dim_domain_mappings_through_minimal(op_to_out, invert_dim_domain_mapping(inp_to_out)); + + return OperatorSpaceToParallelTensorSpaceMapping{ + op_to_inp, + }; +} + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_bias_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + return operator_ptensor_space_mapping_from_composition( + get_operator_to_input_mapping(attrs, input_degrees), + get_input_to_bias_mapping(attrs, input_degrees)); +} + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(LinearAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, input_degrees); + + return get_identity_mapping( + get_operator_task_space(attrs, input_degrees), + output_degrees); +} + } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/reduce.cc b/lib/op-attrs/src/op-attrs/ops/reduce.cc index 2a8bf06ecf..e5474ae124 100644 --- a/lib/op-attrs/src/op-attrs/ops/reduce.cc +++ b/lib/op-attrs/src/op-attrs/ops/reduce.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/reduce.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/reshape.cc b/lib/op-attrs/src/op-attrs/ops/reshape.cc index 6216ad8c6c..d8ea92d540 100644 --- a/lib/op-attrs/src/op-attrs/ops/reshape.cc +++ b/lib/op-attrs/src/op-attrs/ops/reshape.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/reshape.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/reverse.cc b/lib/op-attrs/src/op-attrs/ops/reverse.cc index c38d7e4782..3a063d1af9 100644 --- a/lib/op-attrs/src/op-attrs/ops/reverse.cc +++ b/lib/op-attrs/src/op-attrs/ops/reverse.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/reverse.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/split.cc b/lib/op-attrs/src/op-attrs/ops/split.cc index a9fe691584..ed737b18d9 100644 --- a/lib/op-attrs/src/op-attrs/ops/split.cc +++ b/lib/op-attrs/src/op-attrs/ops/split.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/split.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/topk.cc b/lib/op-attrs/src/op-attrs/ops/topk.cc index 7a6868340b..179d6cfdd3 100644 --- a/lib/op-attrs/src/op-attrs/ops/topk.cc +++ b/lib/op-attrs/src/op-attrs/ops/topk.cc @@ -1,4 +1,5 @@ #include "op-attrs/ops/topk.h" +#include "utils/exception.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/ops/transpose.cc b/lib/op-attrs/src/op-attrs/ops/transpose.cc index 50e6fb35f5..eed3420bda 100644 --- a/lib/op-attrs/src/op-attrs/ops/transpose.cc +++ b/lib/op-attrs/src/op-attrs/ops/transpose.cc @@ -1,14 +1,95 @@ #include "op-attrs/ops/transpose.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.h" +#include "utils/bidict/algorithms/transform_keys.h" +#include "utils/bidict/algorithms/transform_values.h" namespace FlexFlow { -TensorShape get_output_shape(TransposeAttrs const &, TensorShape const &) { - NOT_IMPLEMENTED(); +TensorShape get_output_shape(TransposeAttrs const &attrs, TensorShape const &input_shape) { + return permute_tensor_shape(attrs.permutation, input_shape); } -ParallelTensorShape get_output_shape(TransposeAttrs const &op_attrs, +ParallelTensorDimDegrees get_output_parallel_dim_degrees(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + return permute_parallel_tensor_dim_degrees(attrs.permutation, input_degrees); +} + + +ParallelTensorShape get_output_shape(TransposeAttrs const &attrs, ParallelTensorShape const &input_shape) { - NOT_IMPLEMENTED(); + TensorShape output_shape = get_output_shape(attrs, get_reduced_shape(input_shape)); + + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, get_parallel_degrees(input_shape)); + + return lift_to_parallel_with_degrees(output_shape, output_degrees); +} + +OperatorTaskSpace get_operator_task_space(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, input_degrees); + + return get_operator_task_space_matching_parallel_tensor_dim_degrees(output_degrees); +} + +static ParallelTensorSpaceToParallelTensorSpaceMapping + get_input_to_output_mapping(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + auto ff_dim_to_pt_dim = [](ff_dim_t d) -> parallel_tensor_dim_idx_t { + return parallel_tensor_dim_idx_t{d}; + }; + + EqProjection + inp_to_out = EqProjection{ + transform_keys( + transform_values( + attrs.permutation.as_bidict(), + ff_dim_to_pt_dim), + ff_dim_to_pt_dim), + }; + + project_dims(inp_to_out, sum_dim_idx(), sum_dim_idx()); + project_dims(inp_to_out, discard_copy_dim_idx(), discard_copy_dim_idx()); + + ParallelTensorDimDegrees output_degrees = + get_output_parallel_dim_degrees(attrs, input_degrees); + + return parallel_tensor_space_mapping_from_projection( + DimProjection{inp_to_out}, input_degrees, output_degrees); +} + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_input_mapping(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + ParallelTensorSpaceToParallelTensorSpaceMapping + inp_to_out = get_input_to_output_mapping(attrs, input_degrees); + + ParallelTensorSpaceToParallelTensorSpaceMapping + out_to_inp = invert_parallel_tensor_space_mapping(inp_to_out); + + OperatorSpaceToParallelTensorSpaceMapping + op_to_out = get_operator_to_output_mapping(attrs, input_degrees); + + return operator_ptensor_space_mapping_from_composition( + op_to_out, + out_to_inp); } +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(TransposeAttrs const &attrs, + ParallelTensorDimDegrees const &input_degrees) { + ParallelTensorDimDegrees output_degrees = get_output_parallel_dim_degrees(attrs, input_degrees); + + return get_identity_mapping( + get_operator_task_space(attrs, input_degrees), + output_degrees); +} + + + + } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/ops/weight.cc b/lib/op-attrs/src/op-attrs/ops/weight.cc index 710529af0a..b6d19a66da 100644 --- a/lib/op-attrs/src/op-attrs/ops/weight.cc +++ b/lib/op-attrs/src/op-attrs/ops/weight.cc @@ -1,4 +1,6 @@ #include "op-attrs/ops/weight.h" +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/operator_task_space.h" #include "op-attrs/parallel_tensor_shape.h" namespace FlexFlow { @@ -23,4 +25,15 @@ ParallelTensorShape get_output_parallel_tensor_shape(WeightAttrs const &attrs) { return lift_to_parallel(attrs.tensor_shape); } +OperatorTaskSpace get_operator_task_space(WeightAttrs const &) { + return trivial_op_task_space(); +} + +OperatorSpaceToParallelTensorSpaceMapping + get_operator_to_output_mapping(WeightAttrs const &attrs) { + + return empty_operator_space_to_ptensor_space_map(); +} + + } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/parallel_op_attrs.cc b/lib/op-attrs/src/op-attrs/parallel_op_attrs.cc index c458d4149d..ccacd9bc3e 100644 --- a/lib/op-attrs/src/op-attrs/parallel_op_attrs.cc +++ b/lib/op-attrs/src/op-attrs/parallel_op_attrs.cc @@ -3,6 +3,7 @@ #include "op-attrs/ops/reduction.h" #include "op-attrs/ops/repartition.h" #include "op-attrs/ops/replicate.h" +#include "utils/exception.h" #include "utils/overload.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_degrees.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_degrees.cc new file mode 100644 index 0000000000..ee2f5af8bd --- /dev/null +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_degrees.cc @@ -0,0 +1,168 @@ +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/ff_ordered/ff_ordered_from_map.h" +#include "op-attrs/ff_ordered/get_idxs.h" +#include "op-attrs/num_tensor_dims_t.h" +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "utils/containers/filtermap_keys.h" +#include "utils/containers/filtrans.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/get_all_assignments.h" +#include "utils/containers/map_keys.h" +#include "utils/containers/map_values.h" +#include "utils/containers/binary_merge_disjoint_maps.h" +#include "utils/containers/range.h" +#include "utils/containers/set_union.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/orthotope/minimal_dim_domain.h" + +namespace FlexFlow { + +num_ptensor_shard_dims_t get_ptensor_dim_degrees_num_shard_dims(ParallelTensorDimDegrees const °rees) { + return num_ptensor_shard_dims_t{ + num_elements(degrees.shard_degrees), + }; +} + +num_tensor_dims_t get_ptensor_dim_degrees_num_tensor_dims(ParallelTensorDimDegrees const °rees) { + return num_tensor_dims_from_num_ptensor_shard_dims( + get_ptensor_dim_degrees_num_shard_dims(degrees)); +} + +std::unordered_set + get_parallel_tensor_dim_indices(ParallelTensorDimDegrees const °rees) { + + std::unordered_set result = + unordered_set_of(dim_idxs_for_num_shard_dims(num_elements(degrees.shard_degrees))); + result.insert(sum_dim_idx()); + result.insert(discard_copy_dim_idx()); + return result; +} + + +std::set get_nontrivial_parallel_tensor_dim_indices( + ParallelTensorDimDegrees const °rees) { + std::set nontrivial_replica_dims; + + if (degrees.sum_degree.value > 1) { + nontrivial_replica_dims.insert(parallel_tensor_dim_idx_t{ReplicaType::SUM}); + } + + if (degrees.discard_copy_degree.value > 1) { + nontrivial_replica_dims.insert( + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}); + } + + std::set nontrivial_shard_dims = filtrans( + get_idxs(degrees.shard_degrees), + [&](ff_dim_t const &dim) -> std::optional { + if (degrees.shard_degrees.at(dim) > 1) { + return parallel_tensor_dim_idx_t{dim}; + } else { + return std::nullopt; + } + }); + + return set_union(nontrivial_replica_dims, nontrivial_shard_dims); +} + +positive_int get_degree_for_parallel_tensor_dim_idx(ParallelTensorDimDegrees const &dim_degrees, + parallel_tensor_dim_idx_t const &idx) { + if (idx == sum_dim_idx()) { + return dim_degrees.sum_degree.value; + } else if (idx == discard_copy_dim_idx()) { + return dim_degrees.discard_copy_degree.value; + } else { + return dim_degrees.shard_degrees.at(idx.require_shard_dim()); + } +} + +std::unordered_map + get_parallel_tensor_degree_map(ParallelTensorDimDegrees const °rees) { + + std::unordered_map + replica_dim_degrees = { + {parallel_tensor_dim_idx_t{ReplicaType::SUM}, + degrees.sum_degree.value}, + {parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}, + degrees.discard_copy_degree.value}, + }; + + std::unordered_map shard_dim_degrees = + generate_map(get_idxs(degrees.shard_degrees), [&](ff_dim_t const &dim) { + return degrees.shard_degrees.at(dim); + }); + + return binary_merge_disjoint_maps( + /*lhs=*/replica_dim_degrees, + /*rhs=*/map_keys(shard_dim_degrees, [](ff_dim_t const &dim) { + return parallel_tensor_dim_idx_t{dim}; + })); +} + +std::unordered_set + get_parallel_tensor_space_coordinates( + ParallelTensorDimDegrees const °rees) { + + std::unordered_map degree_map = + get_parallel_tensor_degree_map(degrees); + + std::unordered_map> + possible_per_dim_coords = map_values(degree_map, [](positive_int degree) { + return unordered_set_of(nonnegative_range(degree)); + }); + + return transform( + get_all_assignments(possible_per_dim_coords), + [](std::unordered_map const + &m) { return parallel_tensor_space_coord_from_map(m); }); +} + +DimDomain + dim_domain_from_parallel_tensor_dim_degrees(ParallelTensorDimDegrees const &dim_degrees) { + + return DimDomain{ + generate_map( + get_parallel_tensor_dim_indices(dim_degrees), + [&](parallel_tensor_dim_idx_t idx) { + return get_degree_for_parallel_tensor_dim_idx(dim_degrees, idx); + }), + }; +} + +ParallelTensorDimDegrees + parallel_tensor_dim_degrees_from_dim_domain(DimDomain const &dim_domain) { + + std::unordered_map + shard_dims = + filtermap_keys(dim_domain.dims, + [](parallel_tensor_dim_idx_t dim_idx) { + return dim_idx.try_require_shard_dim(); + }); + + return ParallelTensorDimDegrees{ + /*sum_degree=*/SumDegree{ + dim_domain.dims.at(sum_dim_idx()), + }, + /*discard_copy_degree=*/DiscardCopyDegree{ + dim_domain.dims.at(discard_copy_dim_idx()), + }, + /*shard_degres=*/ff_ordered_from_map(shard_dims), + }; +} + + +MinimalDimDomain + minimal_dim_domain_from_parallel_tensor_dim_degrees(ParallelTensorDimDegrees const &dim_degrees) { + + return minimal_dim_domain_from_dim_domain( + dim_domain_from_parallel_tensor_dim_degrees( + dim_degrees)); +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc new file mode 100644 index 0000000000..c2726bcb7f --- /dev/null +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc @@ -0,0 +1,52 @@ +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "op-attrs/ff_dim_t.h" +#include "utils/containers/set_of.h" +#include "utils/containers/transform.h" + +namespace FlexFlow { + +parallel_tensor_dim_idx_t sum_dim_idx() { + return parallel_tensor_dim_idx_t{ReplicaType::SUM}; +} + +parallel_tensor_dim_idx_t discard_copy_dim_idx() { + return parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}; +} + +parallel_tensor_dim_idx_t shard_dim_idx(ff_dim_t idx) { + return parallel_tensor_dim_idx_t{idx}; +} + +bool is_dim_idx_for_reduction_dimension(parallel_tensor_dim_idx_t dim_idx) { + return (dim_idx == sum_dim_idx()) || (dim_idx == discard_copy_dim_idx()); +} + +std::set + dim_idxs_for_num_shard_dims(nonnegative_int num_shard_dims) { + std::set result = + transform(set_of(ff_dim_range(num_shard_dims)), shard_dim_idx); + result.insert(sum_dim_idx()); + result.insert(discard_copy_dim_idx()); + + return result; +} + +DimOrdering + get_parallel_tensor_dim_ordering() { + + return DimOrdering{ + /*lt=*/[](parallel_tensor_dim_idx_t lhs, parallel_tensor_dim_idx_t rhs) -> bool { + if (lhs.is_shard_dim() && rhs.is_shard_dim()) { + return lhs.require_shard_dim() < rhs.require_shard_dim(); + } else if (lhs.is_shard_dim() && !rhs.is_shard_dim()) { + return false; + } else if (!lhs.is_shard_dim() && rhs.is_shard_dim()) { + return true; + } else { + return lhs.require_replica_dim() > rhs.require_replica_dim(); + } + }, + }; +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc index 1c77bc6ca8..99bedd443b 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dims.cc @@ -29,8 +29,10 @@ std::unordered_set return get_replica_dims(d.replica_dims); } -nonnegative_int num_shard_dims(ParallelTensorDims const &dims) { - return num_elements(dims.shard_dims); +num_ptensor_shard_dims_t num_shard_dims(ParallelTensorDims const &dims) { + return num_ptensor_shard_dims_t{ + num_elements(dims.shard_dims), + }; } ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorDims const &d) { @@ -43,7 +45,8 @@ ParallelTensorDimDegrees get_parallel_degrees(ParallelTensorDims const &d) { ParallelTensorDims lift_to_parallel(TensorDims const &dims) { std::vector shard_degrees = - repeat_element(/*num_times=*/get_num_dims(dims), /*element=*/1_p); + repeat_element(/*num_times=*/get_num_dims(dims).nonnegative_int_from_num_tensor_dims(), + /*element=*/1_p); return lift_to_parallel_with_degrees(dims, SumDegree{1_p}, DiscardCopyDegree{1_p}, diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc index 1b8f6f1dfa..91d3d0b1aa 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc @@ -5,13 +5,15 @@ #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" +#include "utils/exception.h" #include "utils/hash-utils.h" #include "utils/nonnegative_int/nonnegative_range.h" #include "utils/overload.h" +#include namespace FlexFlow { -nonnegative_int num_shard_dims(ParallelTensorShape const &s) { +num_ptensor_shard_dims_t num_shard_dims(ParallelTensorShape const &s) { return num_shard_dims(s.dims); } @@ -97,25 +99,22 @@ ParallelTensorShape TensorShape require_not_parallel(ParallelTensorShape const &s) { positive_int total_degree = get_total_parallel_degree(s); - if (total_degree != 1_p) { - throw mk_runtime_error( - fmt::format("Error: require_not_parallel received a parallel tensor " - "shape with parallel degree {}: {}", - total_degree, - s)); - } + ASSERT(total_degree != 1_p, + "Error: require_not_parallel received a parallel tensor shape with " + "non-zero parallel degree", + s); return get_reduced_shape(s); } -TensorShape get_tensor_shape_unsafe(ParallelTensorShape const &) { - NOT_IMPLEMENTED(); -} - TensorShape get_piece_shape(ParallelTensorShape const &s) { return get_reduced_shape(s); } +num_bytes_t get_piece_size_in_bytes(ParallelTensorShape const &s) { + return get_size_in_bytes(get_piece_shape(s)); +} + TensorShape get_reduced_shape(ParallelTensorShape const &s) { return TensorShape{ get_reduced_dims(s.dims), @@ -142,7 +141,7 @@ std::unordered_set get_parallel_tensor_dim_indices(ParallelTensorShape const &shape) { std::unordered_set indices; extend(indices, - transform(nonnegative_range(num_shard_dims(shape.dims)), + transform(nonnegative_range(num_shard_dims(shape.dims).value), [](nonnegative_int idx) { return parallel_tensor_dim_idx_t{ff_dim_t{idx}}; })); diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_space_coordinate.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_space_coordinate.cc new file mode 100644 index 0000000000..a061b7b675 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_space_coordinate.cc @@ -0,0 +1,73 @@ +#include "op-attrs/parallel_tensor_space_coordinate.h" +#include "op-attrs/ff_ordered/ff_ordered_from_map.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "utils/containers/filtermap_keys.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/nonnegative_int/num_elements.h" + +namespace FlexFlow { + +num_ptensor_parallel_dims_t ptensor_coord_num_dims(ParallelTensorSpaceCoordinate const &c) { + return num_ptensor_parallel_dims_t{ + 2_n + num_elements(c.shard_components), + }; +} + +std::unordered_set + get_dim_idxs_in_ptensor_space_coord(ParallelTensorSpaceCoordinate const &coord) { + + std::unordered_set result = + unordered_set_of(dim_idxs_for_num_shard_dims(num_elements(coord.shard_components))); + result.insert(sum_dim_idx()); + result.insert(discard_copy_dim_idx()); + return result; +} + + +nonnegative_int ptensor_coord_component_for_ptensor_dim_idx( + ParallelTensorSpaceCoordinate const &coord, + parallel_tensor_dim_idx_t dim_idx) { + if (dim_idx == sum_dim_idx()) { + return coord.sum_component; + } else if (dim_idx == discard_copy_dim_idx()) { + return coord.discard_copy_component; + } else { + return coord.shard_components.at(dim_idx.require_shard_dim()); + } +} + +ParallelTensorSpaceCoordinate parallel_tensor_space_coord_from_map( + std::unordered_map const &m) { + + std::unordered_map shard_map = + filtermap_keys(m, [](parallel_tensor_dim_idx_t const &d) { + return d.try_require_shard_dim(); + }); + + return ParallelTensorSpaceCoordinate{ + /*sum_idx=*/m.at(parallel_tensor_dim_idx_t{ReplicaType::SUM}), + /*discard_copy_idx=*/ + m.at(parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}), + /*shard_idxs=*/ff_ordered_from_map(shard_map), + }; +} + +ParallelTensorSpaceCoordinate parallel_tensor_space_coord_from_dim_coord( + DimCoord const &dim_coord) { + return parallel_tensor_space_coord_from_map(dim_coord.raw); +} + +DimCoord + dim_coord_from_parallel_tensor_space_coord( + ParallelTensorSpaceCoordinate const &coord) { + + return DimCoord{ + generate_map(get_dim_idxs_in_ptensor_space_coord(coord), + [&](parallel_tensor_dim_idx_t idx) { + return ptensor_coord_component_for_ptensor_dim_idx(coord, idx); + }), + }; +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.cc new file mode 100644 index 0000000000..eedaa07897 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.cc @@ -0,0 +1,33 @@ +#include "op-attrs/parallel_tensor_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" + +namespace FlexFlow { + +ParallelTensorSpaceToParallelTensorSpaceMapping + parallel_tensor_space_mapping_from_projection( + DimProjection const &projection, + ParallelTensorDimDegrees const &l_degrees, + ParallelTensorDimDegrees const &r_degrees) { + + return ParallelTensorSpaceToParallelTensorSpaceMapping{ + dim_domain_mapping_from_projection( + /*projection=*/projection, + /*l_domain=*/dim_domain_from_parallel_tensor_dim_degrees(l_degrees), + /*r_domain=*/dim_domain_from_parallel_tensor_dim_degrees(r_degrees), + /*l_dim_ordering=*/get_parallel_tensor_dim_ordering(), + /*r_dim_ordering=*/get_parallel_tensor_dim_ordering()), + }; +} + +ParallelTensorSpaceToParallelTensorSpaceMapping + invert_parallel_tensor_space_mapping( + ParallelTensorSpaceToParallelTensorSpaceMapping const &m) { + return ParallelTensorSpaceToParallelTensorSpaceMapping{ + invert_dim_domain_mapping(m.raw_mapping), + }; +} + + + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc b/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc index a987841b18..91caa03f36 100644 --- a/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc +++ b/lib/op-attrs/src/op-attrs/relative_ff_dim_t.cc @@ -3,10 +3,10 @@ namespace FlexFlow { ff_dim_t ff_dim_t_from_relative_ff_dim_t(relative_ff_dim_t ff_dim, - nonnegative_int input_dim) { + num_tensor_dims_t input_dim) { int raw = ff_dim.value; if (raw < 0) { - raw = input_dim.unwrap_nonnegative() + raw; + raw = input_dim.int_from_num_tensor_dims() + raw; } return ff_dim_t{nonnegative_int{raw}}; } diff --git a/lib/op-attrs/src/op-attrs/shape_inference.cc b/lib/op-attrs/src/op-attrs/shape_inference.cc index 4a0ff72fb4..8022584934 100644 --- a/lib/op-attrs/src/op-attrs/shape_inference.cc +++ b/lib/op-attrs/src/op-attrs/shape_inference.cc @@ -20,6 +20,7 @@ #include "op-attrs/ops/repartition.h" #include "op-attrs/ops/replicate.h" #include "op-attrs/ops/softmax.h" +#include "op-attrs/ops/transpose.h" #include "op-attrs/ops/weight.h" #include "utils/containers/get_only.h" #include "utils/overload.h" @@ -69,11 +70,10 @@ std::vector [&](ElementBinaryAttrs const &attrs) -> std::vector { auto [i1, i2] = require_2(input_shapes); - return {throw_if_unexpected(get_output_shape(attrs, i1, i2))}; + return {get_output_shape(attrs, i1, i2)}; }, [&](ElementUnaryAttrs const &attrs) -> std::vector { - return {throw_if_unexpected( - get_output_shape(attrs, get_only(input_shapes)))}; + return {get_output_shape(attrs, get_only(input_shapes))}; }, [&](EmbeddingAttrs const &attrs) -> std::vector { return {throw_if_unexpected( @@ -110,6 +110,12 @@ std::vector return {throw_if_unexpected( get_output_shape(attrs, get_only(input_shapes)))}; }, + [&](TransposeAttrs const &attrs) -> std::vector { + + return { + get_output_shape(attrs, get_only(input_shapes)), + }; + }, [&](WeightAttrs const &attrs) -> std::vector { return {get_output_shape(attrs)}; }, @@ -203,19 +209,17 @@ std::vector [&](ElementBinaryAttrs const &attrs) -> std::vector { auto [i1, i2] = require_2(input_shapes); - return {throw_if_unexpected(get_output_shape(attrs, i1, i2))}; + return {get_output_shape(attrs, i1, i2)}; }, [&](ElementUnaryAttrs const &attrs) -> std::vector { - return {throw_if_unexpected( - get_output_shape(attrs, get_only(input_shapes)))}; + return {get_output_shape(attrs, get_only(input_shapes))}; }, [&](EmbeddingAttrs const &attrs) -> std::vector { return {throw_if_unexpected( get_output_shape(attrs, get_only(input_shapes)))}; }, [&](FlatAttrs const &attrs) -> std::vector { - return {throw_if_unexpected( - get_output_shape(attrs, get_only(input_shapes)))}; + return {get_output_shape(attrs, get_only(input_shapes))}; }, [&](GatherAttrs const &attrs) -> std::vector { return { @@ -257,6 +261,11 @@ std::vector return {throw_if_unexpected( get_output_shape(attrs, get_only(input_shapes)))}; }, + [&](TransposeAttrs const &attrs) -> std::vector { + return { + get_output_shape(attrs, get_only(input_shapes)), + }; + }, [&](WeightAttrs const &attrs) -> std::vector { return {get_output_parallel_tensor_shape(attrs)}; }, @@ -341,6 +350,9 @@ std::vector [&](SoftmaxAttrs const &attrs) -> std::vector { return {}; }, + [&](TransposeAttrs const &attrs) -> std::vector { + return {}; + }, [&](WeightAttrs const &attrs) -> std::vector { return {}; }, diff --git a/lib/op-attrs/src/op-attrs/task_space_coordinate.cc b/lib/op-attrs/src/op-attrs/task_space_coordinate.cc new file mode 100644 index 0000000000..180cea12e5 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/task_space_coordinate.cc @@ -0,0 +1,54 @@ +#include "op-attrs/task_space_coordinate.h" +#include "op-attrs/operator_task_space.h" +#include "op-attrs/operator_task_space_dim_idx_t.h" +#include "utils/containers/map_keys.h" +#include "utils/containers/vector_from_idx_map.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/orthotope/dim_coord.h" +#include "utils/orthotope/orthotope_coord.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" + +namespace FlexFlow { + +nonnegative_int task_space_coord_num_dims(TaskSpaceCoordinate const &coord) { + return orthotope_coord_num_dims(coord.orthotope_coord); +} + +TaskSpaceCoordinate + make_task_space_coordinate(std::vector const &elems) { + return TaskSpaceCoordinate{OrthotopeCoord{elems}}; +} + +TaskSpaceCoordinate + task_space_coordinate_from_dim_coord( + DimCoord const &dim_coord) { + std::unordered_set coord_dims = get_coord_dims(dim_coord); + + std::set dims = operator_task_space_dim_idx_range(num_elements(coord_dims)); + + ASSERT(coord_dims == unordered_set_of(dims)); + + std::unordered_map idx_map = map_keys( + dim_coord.raw, [](operator_task_space_dim_idx_t idx) { + return idx.raw_idx; + }); + + return TaskSpaceCoordinate{ + OrthotopeCoord{ + vector_from_idx_map(idx_map).value(), + }, + }; +} + +DimCoord + dim_coord_from_task_space_coordinate(TaskSpaceCoordinate const &coord) { + + return dim_coord_from_orthotope_coord( + coord.orthotope_coord, + unordered_set_of(operator_task_space_dim_idx_range(orthotope_coord_num_dims(coord.orthotope_coord))), + get_operator_task_space_dim_ordering()); +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/tensor_dim_permutation.cc b/lib/op-attrs/src/op-attrs/tensor_dim_permutation.cc new file mode 100644 index 0000000000..4985ed8d11 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/tensor_dim_permutation.cc @@ -0,0 +1,219 @@ +#include "op-attrs/tensor_dim_permutation.h" +#include "op-attrs/ff_ordered/ff_ordered_from_map.h" +#include "op-attrs/ff_ordered/map_from_ff_ordered.h" +#include "utils/bidict/algorithms/exhaustive_relational_join.h" +#include "utils/bidict/algorithms/left_entries.h" +#include "utils/bidict/algorithms/right_entries.h" +#include "utils/bidict/bidict.h" +#include "utils/containers/map_keys.h" +#include "utils/containers/minimum.h" +#include "utils/containers/maximum.h" +#include "utils/containers/permute_with_key.h" +#include "utils/containers/require_same.h" +#include "utils/bidict/algorithms/bidict_from_keys_and_values.h" +#include "utils/hash/tuple.h" +#include "utils/fmt/unordered_set.h" + +namespace FlexFlow { + +static void check_are_contiguous_from_one(std::unordered_set const &idxs) { + if (idxs.empty()) { + return; + } + + ASSERT(minimum(idxs) == ff_dim_t{0_n}); + ASSERT(maximum(idxs) == ff_dim_t{nonnegative_int{idxs.size() - 1}}); +} + +TensorDimPermutation::TensorDimPermutation( + bidict const &raw) + : raw(raw) +{ + check_are_contiguous_from_one(right_entries(raw)); + check_are_contiguous_from_one(left_entries(raw)); +} + +bool TensorDimPermutation::operator==(TensorDimPermutation const &other) const { + return this->tie() == other.tie(); +} + +bool TensorDimPermutation::operator!=(TensorDimPermutation const &other) const { + return this->tie() == other.tie(); +} + +bool TensorDimPermutation::operator<(TensorDimPermutation const &other) const { + return this->tie() < other.tie(); +} + +bool TensorDimPermutation::operator>(TensorDimPermutation const &other) const { + return this->tie() > other.tie(); +} + +bool TensorDimPermutation::operator<=(TensorDimPermutation const &other) const { + return this->tie() <= other.tie(); +} + +bool TensorDimPermutation::operator>=(TensorDimPermutation const &other) const { + return this->tie() >= other.tie(); +} + +ff_dim_t TensorDimPermutation::at_l(ff_dim_t l) const { + return this->raw.at_l(l); +} + +ff_dim_t TensorDimPermutation::at_r(ff_dim_t r) const { + return this->raw.at_r(r); +} + +num_tensor_dims_t TensorDimPermutation::num_tensor_dims() const { + return num_tensor_dims_t{ + num_elements(this->raw), + }; +} + +bidict const &TensorDimPermutation::as_bidict() const { + return this->raw; +} + +std::tuple const &> TensorDimPermutation::tie() const { + return std::tie(this->raw); +} + +bidict format_as(TensorDimPermutation const &p) { + return p.as_bidict(); +} + +std::ostream &operator<<(std::ostream &s, TensorDimPermutation const &p) { + return (s << fmt::to_string(p)); +} + +TensorDimPermutation compose_tensor_dim_permutations( + TensorDimPermutation const &lhs, + TensorDimPermutation const &rhs) { + + ASSERT(lhs.num_tensor_dims() == rhs.num_tensor_dims()); + + return TensorDimPermutation{ + exhaustive_relational_join(lhs.as_bidict(), rhs.as_bidict()), + }; +} + +TensorDimPermutation invert_tensor_dim_permutation( + TensorDimPermutation const &p) { + + return TensorDimPermutation{ + p.as_bidict().reversed(), + }; +} + +template +static FFOrdered permute_ff_ordered(TensorDimPermutation const &permutation, + FFOrdered const &ff_ordered) { + return ff_ordered_from_map( + map_keys( + map_from_ff_ordered(ff_ordered), + [&](ff_dim_t k) { + return permutation.at_l(k); + })); +} + + +TensorDims + permute_tensor_dims(TensorDimPermutation const &permutation, + TensorDims const &dims) { + + return TensorDims{ + permute_ff_ordered(permutation, dims.ff_ordered), + }; +} + +TensorShape + permute_tensor_shape(TensorDimPermutation const &permutation, + TensorShape const &shape) { + return TensorShape{ + /*dims=*/permute_tensor_dims(permutation, shape.dims), + /*data_type=*/shape.data_type, + }; +} + +ParallelTensorDimDegrees + permute_parallel_tensor_dim_degrees(TensorDimPermutation const &permutation, + ParallelTensorDimDegrees const ¶llel_tensor_dim_degrees) { + return ParallelTensorDimDegrees{ + /*sum_degree=*/parallel_tensor_dim_degrees.sum_degree, + /*discard_copy_degree=*/parallel_tensor_dim_degrees.discard_copy_degree, + /*shard_degrees=*/permute_ff_ordered(permutation, parallel_tensor_dim_degrees.shard_degrees), + }; +} + +ParallelTensorDims + permute_parallel_tensor_dims(TensorDimPermutation const &permutation, + ParallelTensorDims const ¶llel_tensor_dims) { + return ParallelTensorDims{ + /*shard_dims=*/permute_ff_ordered(permutation, parallel_tensor_dims.shard_dims), + /*replica_dims=*/parallel_tensor_dims.replica_dims, + }; +} + +ParallelTensorShape + permute_parallel_tensor_shape(TensorDimPermutation const &permutation, + ParallelTensorShape const ¶llel_tensor_shape) { + return ParallelTensorShape{ + /*dims=*/permute_parallel_tensor_dims(permutation, parallel_tensor_shape.dims), + /*data_type=*/parallel_tensor_shape.data_type, + }; +} + +} // namespace FlexFlow + +namespace nlohmann { + +::FlexFlow::TensorDimPermutation adl_serializer<::FlexFlow::TensorDimPermutation>::from_json(json const &j) { + ::FlexFlow::bidict<::FlexFlow::ff_dim_t, ::FlexFlow::ff_dim_t> b = j; + + return ::FlexFlow::TensorDimPermutation{b}; +} + +void adl_serializer<::FlexFlow::TensorDimPermutation>::to_json( + json &j, ::FlexFlow::TensorDimPermutation const &p) { + j = p.as_bidict(); +} + +} // namespace nlohmann + +namespace rc { + +Gen<::FlexFlow::TensorDimPermutation> Arbitrary<::FlexFlow::TensorDimPermutation>::arbitrary() { + using namespace ::FlexFlow; + + Gen> key_permutation_gen = + gen::withSize([=](int size) { + nonnegative_int reduced_size = std::min(nonnegative_int{size}, 5_n); + std::vector sized_keys = ff_dim_range(reduced_size); + return gen::map( + gen::arbitrary(), + [=](int key) -> std::vector { + return permute_with_key(key, sized_keys); + }); + }); + + return gen::construct( + gen::apply( + [](std::vector const &ks, std::vector const &vs) { + return bidict_from_keys_and_values(ks, vs); + }, + key_permutation_gen, + key_permutation_gen + ) + ); +} + +} // namespace rc + +namespace std { + +size_t hash<::FlexFlow::TensorDimPermutation>::operator()(::FlexFlow::TensorDimPermutation const &p) const { + return get_std_hash(p.tie()); +} + +} // namespace std diff --git a/lib/op-attrs/src/op-attrs/tensor_dims.cc b/lib/op-attrs/src/op-attrs/tensor_dims.cc index 435f211a01..dcf36b5d7b 100644 --- a/lib/op-attrs/src/op-attrs/tensor_dims.cc +++ b/lib/op-attrs/src/op-attrs/tensor_dims.cc @@ -31,8 +31,10 @@ bool tensor_dims_has_dim(TensorDims const &tensor_dims, ff_dim_t dim) { return contains(get_idxs(tensor_dims.ff_ordered), dim); } -nonnegative_int get_num_dims(TensorDims const &dims) { - return num_elements(dims.ff_ordered); +num_tensor_dims_t get_num_dims(TensorDims const &dims) { + return num_tensor_dims_t{ + num_elements(dims.ff_ordered), + }; } positive_int dim_at_idx(TensorDims const &dims, relative_ff_dim_t idx) { @@ -115,7 +117,7 @@ TensorDimsCoord get_broadcast_src_coord(TensorDims const &input_dims, output_dims); relative_ff_dim_t trailing_start_idx = - relative_ff_dim_t{-1 * get_num_dims(input_dims).unwrap_nonnegative()}; + relative_ff_dim_t{-1 * get_num_dims(input_dims).int_from_num_tensor_dims()}; FFOrdered trailing_entries = slice(dst_coord.ff_ordered, trailing_start_idx); diff --git a/lib/op-attrs/src/parallel_dim_mapping_record.cc b/lib/op-attrs/src/parallel_dim_mapping_record.cc deleted file mode 100644 index 5e734e88cd..0000000000 --- a/lib/op-attrs/src/parallel_dim_mapping_record.cc +++ /dev/null @@ -1,60 +0,0 @@ -#include "parallel_dim_mapping_record.h" -#include - -namespace FlexFlow { - -ParallelDimMappingRecord::ParallelDimMappingRecord(MappingRecordType type) - : type(type), output_dim(-1), input_dim(-1), weight_dim(-1), output_idx(-1), - input_idx(-1), weight_idx(-1) {} - -/*static*/ -ParallelDimMappingRecord ParallelDimMappingRecord::input_output_record( - int input_idx, - int input_dim, - int output_idx, - int output_dim, - std::optional operation) { - ParallelDimMappingRecord r(MappingRecordType::INPUT_OUTPUT); - r.operation = operation; - - assert(output_idx >= 0); - assert(output_dim >= 0); - assert(input_idx >= 0); - assert(input_dim >= 0); - - r.output_idx = output_idx; - r.output_dim = output_dim; - r.input_idx = input_idx; - r.input_dim = input_dim; - - return r; -} - -/*static*/ -ParallelDimMappingRecord ParallelDimMappingRecord::input_weight_record( - int input_idx, - int input_dim, - int weight_idx, - int weight_dim, - std::optional operation) { - ParallelDimMappingRecord r(MappingRecordType::INPUT_WEIGHT); - r.operation = operation; - - assert(input_idx >= 0); - assert(input_dim >= 0); - assert(weight_idx >= 0); - assert(weight_dim >= 0); - - r.input_idx = input_idx; - r.input_dim = input_dim; - r.weight_idx = weight_idx; - r.weight_dim = weight_dim; - - return r; -} - -MappingRecordType ParallelDimMappingRecord::get_type() const { - return this->type; -} - -} // namespace FlexFlow diff --git a/lib/op-attrs/src/parallel_dim_mapping_record.h b/lib/op-attrs/src/parallel_dim_mapping_record.h deleted file mode 100644 index c37ac79b40..0000000000 --- a/lib/op-attrs/src/parallel_dim_mapping_record.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _FLEXFLOW_OP_META_SRC_PARELLEL_DIM_MAPPING_RECORD_H -#define _FLEXFLOW_OP_META_SRC_PARELLEL_DIM_MAPPING_RECORD_H - -#include "utils/visitable.h" -#include - -namespace FlexFlow { - -enum class MappingRecordType { INPUT_OUTPUT, INPUT_WEIGHT }; - -enum class MappingOperation { PARTITION, REPLICATE }; - -class ParallelDimMappingRecord { -private: - ParallelDimMappingRecord(MappingRecordType); - -public: - ParallelDimMappingRecord() = delete; - - static ParallelDimMappingRecord input_output_record( - int input_idx, - int input_dim, - int output_idx, - int output_dim, - std::optional operation = std::nullopt); - static ParallelDimMappingRecord input_weight_record( - int input_idx, - int input_dim, - int weight_idx, - int weight_dim, - std::optional operation = std::nullopt); - MappingRecordType get_type() const; - -public: - MappingRecordType type; - std::optional operation; - - int output_dim, input_dim, weight_dim; - int output_idx, input_idx, weight_idx; -}; - -} // namespace FlexFlow - -VISITABLE_STRUCT(::FlexFlow::ParallelDimMappingRecord, - type, - operation, - output_dim, - input_dim, - weight_dim, - output_idx, - input_idx, - weight_idx); - -#endif diff --git a/lib/op-attrs/test/src/op-attrs/dim_ordered/dim_ordered.cc b/lib/op-attrs/test/src/op-attrs/dim_ordered/dim_ordered.cc deleted file mode 100644 index a5a261da25..0000000000 --- a/lib/op-attrs/test/src/op-attrs/dim_ordered/dim_ordered.cc +++ /dev/null @@ -1,13 +0,0 @@ -#include "op-attrs/dim_ordered/dim_ordered.h" -#include "doctest/doctest.h" -#include "test/utils/rapidcheck.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - - TEST_CASE_TEMPLATE( - "Arbitrary> with T=", T, int, double, char) { - RC_SUBCASE([](DimOrdered) {}); - } -} diff --git a/lib/op-attrs/test/src/op-attrs/dim_ordered/zip.cc b/lib/op-attrs/test/src/op-attrs/dim_ordered/zip.cc deleted file mode 100644 index b77bb8f71e..0000000000 --- a/lib/op-attrs/test/src/op-attrs/dim_ordered/zip.cc +++ /dev/null @@ -1,41 +0,0 @@ -#include "op-attrs/dim_ordered/zip.h" -#include "op-attrs/ff_dim_t.dtg.h" -#include "test/utils/doctest/fmt/pair.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("zip(DimOrdered, DimOrdered)") { - DimOrdered lhs_input = {9, 9, 8, 9}; - DimOrdered rhs_input = {"m", "m", "k", "l", "m"}; - - SUBCASE("lhs is longer") { - DimOrdered> result = - zip(lhs_input, rhs_input); - - DimOrdered> correct = { - {9, "m"}, - {9, "m"}, - {8, "k"}, - {9, "l"}, - }; - - CHECK(result == correct); - } - - SUBCASE("rhs is longer") { - DimOrdered> result = - zip(rhs_input, lhs_input); - - DimOrdered> correct = { - {"m", 9}, - {"m", 9}, - {"k", 8}, - {"l", 9}, - }; - - CHECK(result == correct); - } - } -} diff --git a/lib/op-attrs/test/src/op-attrs/get_incoming_tensor_roles.cc b/lib/op-attrs/test/src/op-attrs/get_incoming_tensor_roles.cc index 4688ad4008..f30e6cdc0b 100644 --- a/lib/op-attrs/test/src/op-attrs/get_incoming_tensor_roles.cc +++ b/lib/op-attrs/test/src/op-attrs/get_incoming_tensor_roles.cc @@ -7,9 +7,9 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE( "get_incoming_tensor_roles(ComputationGraphOpAttrs, int num_incoming)") { SUBCASE("Concat") { - int num_incoming = 4; + nonnegative_int num_incoming = 4_n; ComputationGraphOpAttrs attrs = - ComputationGraphOpAttrs{ConcatAttrs{ff_dim_t{nonnegative_int{0}}}}; + ComputationGraphOpAttrs{ConcatAttrs{ff_dim_t{0_n}}}; std::vector result = get_incoming_tensor_roles(attrs, num_incoming); diff --git a/lib/op-attrs/test/src/op-attrs/operator_space_to_parallel_tensor_space_mapping.cc b/lib/op-attrs/test/src/op-attrs/operator_space_to_parallel_tensor_space_mapping.cc new file mode 100644 index 0000000000..1543e14ea0 --- /dev/null +++ b/lib/op-attrs/test/src/op-attrs/operator_space_to_parallel_tensor_space_mapping.cc @@ -0,0 +1,125 @@ +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "utils/orthotope/up_projection.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_identity_mapping(OperatorTaskSpace, ParallelTensorDimDegrees)") { + ParallelTensorDimDegrees dim_degrees = ParallelTensorDimDegrees{ + /*sum_degree=*/SumDegree{2_p}, + /*discard_copy_degree=*/DiscardCopyDegree{1_p}, + /*shard_degrees=*/FFOrdered{ + 1_p, + 3_p, + 1_p, + }, + }; + + OperatorTaskSpace operator_task_space = OperatorTaskSpace{MinimalOrthotope{{ + 3_ge2, 2_ge2, + }}}; + + OperatorSpaceToParallelTensorSpaceMapping result = + get_identity_mapping(operator_task_space, dim_degrees); + + auto make_op_coord = [](nonnegative_int x, nonnegative_int y) { + return DimCoord{{ + {operator_task_space_dim_idx_t{0_n}, x}, + {operator_task_space_dim_idx_t{1_n}, y}, + }}; + }; + + auto make_pt_coord = [](nonnegative_int sum_coord_entry, nonnegative_int shard_coord_entry) { + return DimCoord{{ + {sum_dim_idx(), sum_coord_entry}, + {discard_copy_dim_idx(), 0_n}, + {shard_dim_idx(ff_dim_t{0_n}), 0_n}, + {shard_dim_idx(ff_dim_t{1_n}), shard_coord_entry}, + {shard_dim_idx(ff_dim_t{2_n}), 0_n}, + }}; + }; + + OperatorSpaceToParallelTensorSpaceMapping correct = + OperatorSpaceToParallelTensorSpaceMapping{ + DimDomainMapping{ + /*coord_mapping=*/bidict< + DimCoord, + DimCoord + >{ + {make_op_coord(0_n, 0_n), make_pt_coord(0_n, 0_n)}, + {make_op_coord(0_n, 1_n), make_pt_coord(1_n, 0_n)}, + {make_op_coord(1_n, 0_n), make_pt_coord(0_n, 1_n)}, + {make_op_coord(1_n, 1_n), make_pt_coord(1_n, 1_n)}, + {make_op_coord(2_n, 0_n), make_pt_coord(0_n, 2_n)}, + {make_op_coord(2_n, 1_n), make_pt_coord(1_n, 2_n)}, + }, + /*l_domain=*/DimDomain{{ + {operator_task_space_dim_idx_t{0_n}, 3_p}, + {operator_task_space_dim_idx_t{1_n}, 2_p}, + }}, + /*r_domain=*/DimDomain{{ + {sum_dim_idx(), 2_p}, + {discard_copy_dim_idx(), 1_p}, + {shard_dim_idx(ff_dim_t{0_n}), 1_p}, + {shard_dim_idx(ff_dim_t{1_n}), 3_p}, + {shard_dim_idx(ff_dim_t{2_n}), 1_p}, + }}, + }, + }; + + CHECK(result == correct); + } + + TEST_CASE("ptensor_coord_for_task_space_coord") { + SUBCASE("identity projection") { + OperatorTaskSpace op_task_space = OperatorTaskSpace{ + MinimalOrthotope{{ + 5_ge2, + 3_ge2, + 12_ge2, + 2_ge2, + }}, + }; + + ParallelTensorDimDegrees dim_degrees = ParallelTensorDimDegrees{ + /*sum_degree=*/SumDegree{5_p}, + /*discard_copy_degree=*/DiscardCopyDegree{3_p}, + /*shard_degrees=*/FFOrdered{ + 12_p, + 2_p, + }, + }; + + OperatorSpaceToParallelTensorSpaceMapping mapping = get_identity_mapping(op_task_space, dim_degrees); + + TaskSpaceCoordinate task_space_coordinate = TaskSpaceCoordinate{ + OrthotopeCoord{ + std::vector{ + 3_n, + 2_n, + 10_n, + 1_n, + }, + }, + }; + + ParallelTensorSpaceCoordinate result = + ptensor_coord_for_task_space_coord( + /*mapping=*/mapping, + /*task_space_coord=*/task_space_coordinate); + + ParallelTensorSpaceCoordinate correct = ParallelTensorSpaceCoordinate{ + /*sum_component=*/3_n, + /*discard_copy_component=*/2_n, + /*shard_components=*/FFOrdered{ + 10_n, + 1_n, + }, + }; + + CHECK(result == correct); + } + } +} diff --git a/lib/op-attrs/test/src/op-attrs/operator_task_space.cc b/lib/op-attrs/test/src/op-attrs/operator_task_space.cc new file mode 100644 index 0000000000..99b7276122 --- /dev/null +++ b/lib/op-attrs/test/src/op-attrs/operator_task_space.cc @@ -0,0 +1,80 @@ +#include "op-attrs/operator_task_space.h" +#include "utils/fmt/unordered_set.h" +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_task_space_coordinates") { + + SUBCASE("OperatorTaskSpace has 0 dimensions") { + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{}}}; + + std::unordered_set correct = { + TaskSpaceCoordinate{OrthotopeCoord{{}}}}; + std::unordered_set result = + get_task_space_coordinates(task); + CHECK(correct == result); + } + + SUBCASE("OperatorTaskSpace has 2 dimensions") { + + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{2_ge2, 2_ge2}}}; + + std::unordered_set correct = {{ + TaskSpaceCoordinate{OrthotopeCoord{{0_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{0_n, 1_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{1_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{1_n, 1_n}}}, + }}; + std::unordered_set result = + get_task_space_coordinates(task); + CHECK(correct == result); + } + + SUBCASE("OperatorTaskSpace has 3 dimensions") { + + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{3_ge2, 2_ge2, 2_ge2}}}; + + std::unordered_set correct = {{ + TaskSpaceCoordinate{OrthotopeCoord{{0_n, 0_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{0_n, 0_n, 1_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{0_n, 1_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{0_n, 1_n, 1_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{1_n, 0_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{1_n, 0_n, 1_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{1_n, 1_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{1_n, 1_n, 1_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{2_n, 0_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{2_n, 0_n, 1_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{2_n, 1_n, 0_n}}}, + TaskSpaceCoordinate{OrthotopeCoord{{2_n, 1_n, 1_n}}}, + }}; + std::unordered_set result = + get_task_space_coordinates(task); + CHECK(correct == result); + } + } + + TEST_CASE("get_task_space_maximum_coordinate") { + SUBCASE("OperatorTaskSpace has 2 dimensions") { + + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{3_ge2, 2_ge2}}}; + + TaskSpaceCoordinate correct = + TaskSpaceCoordinate{OrthotopeCoord{{2_n, 1_n}}}; + TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); + CHECK(correct == result); + } + + SUBCASE("OperatorTaskSpace has 3 dimensions") { + + OperatorTaskSpace task = OperatorTaskSpace{MinimalOrthotope{{3_ge2, 2_ge2, 4_ge2}}}; + + TaskSpaceCoordinate correct = + TaskSpaceCoordinate{OrthotopeCoord{{2_n, 1_n, 3_n}}}; + TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); + CHECK(correct == result); + } + } +} diff --git a/lib/op-attrs/test/src/op-attrs/operator_task_space_dim_idx_t.cc b/lib/op-attrs/test/src/op-attrs/operator_task_space_dim_idx_t.cc new file mode 100644 index 0000000000..5bb0102671 --- /dev/null +++ b/lib/op-attrs/test/src/op-attrs/operator_task_space_dim_idx_t.cc @@ -0,0 +1,15 @@ +#include "op-attrs/operator_task_space_dim_idx_t.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("operator_task_space_dim_idx_range(nonnegative_int)") { + SUBCASE("end is zero") { + std::set result = + operator_task_space_dim_idx_range(nonnegative_int{0}); + std::set correct = { + operator_task_space_dim_idx_t{nonnegative_int{0}}}; + } + } +} diff --git a/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc b/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc index d251fb731d..1044c379f0 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/batch_matmul.cc @@ -12,9 +12,9 @@ TEST_SUITE(FF_TEST_SUITE) { positive_int p = 10_p; BatchMatmulAttrs attrs = BatchMatmulAttrs{ - /*a_seq_length_dim=*/0_n, // TODO figure out if these arguments are + /*a_seq_length_dim=*/1_p, // TODO figure out if these arguments are // still relevant - /*b_seq_length_dim=*/0_n, + /*b_seq_length_dim=*/1_p, }; TensorShape input_lhs_shape = TensorShape{ @@ -106,9 +106,9 @@ TEST_SUITE(FF_TEST_SUITE) { positive_int o_sum = 11_p; BatchMatmulAttrs attrs = BatchMatmulAttrs{ - /*a_seq_length_dim=*/0_n, // TODO figure out if these arguments are + /*a_seq_length_dim=*/0_p, // TODO figure out if these arguments are // still relevant - /*b_seq_length_dim=*/0_n, + /*b_seq_length_dim=*/0_p, }; auto make_lhs = [&](SumDegree o_sum, diff --git a/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc b/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc index 56407c03f1..53325286f5 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/conv_2d.cc @@ -1,6 +1,6 @@ #include "op-attrs/ops/conv_2d.h" -#include "doctest/doctest.h" #include "utils/integer_conversions.h" +#include using namespace ::FlexFlow; diff --git a/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc b/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc index 72d499d20e..877284b511 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/element_binary.cc @@ -44,12 +44,7 @@ TEST_SUITE(FF_TEST_SUITE) { TensorShape incorrect_rhs = input_lhs; dim_at_idx(incorrect_rhs.dims, relative_ff_dim_t{0}) += 1_p; - tl::expected result = - get_output_shape(attrs, input_lhs, incorrect_rhs); - - CHECK_MESSAGE(!result.has_value(), - "Unexpected successful result: ", - result.error()); + CHECK_THROWS(get_output_shape(attrs, input_lhs, incorrect_rhs)); } } @@ -146,12 +141,8 @@ TEST_SUITE(FF_TEST_SUITE) { make_lhs(SumDegree{1_p}, DiscardCopyDegree{degree}, 1_p, 1_p, 1_p); ParallelTensorShape input_rhs = make_rhs(SumDegree{1_p}, DiscardCopyDegree{degree}, 1_p, 1_p, 1_p); - tl::expected result = - get_output_shape(attrs, input_lhs, input_rhs); - CHECK_MESSAGE(!result.has_value(), - "Unexpected successful result: ", - result.error()); + CHECK_THROWS(get_output_shape(attrs, input_lhs, input_rhs)); } SUBCASE("invalid mismatched parallelism degrees") { @@ -161,12 +152,8 @@ TEST_SUITE(FF_TEST_SUITE) { make_lhs(SumDegree{1_p}, DiscardCopyDegree{1_p}, 1_p, degree, 1_p); ParallelTensorShape input_rhs = make_rhs(SumDegree{1_p}, DiscardCopyDegree{1_p}, 1_p, 1_p, degree); - tl::expected result = - get_output_shape(attrs, input_lhs, input_rhs); - CHECK_MESSAGE(!result.has_value(), - "Unexpected successful result: ", - result.error()); + CHECK_THROWS(get_output_shape(attrs, input_lhs, input_rhs)); } } } diff --git a/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc b/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc index 355feb4c5f..eccddd35d8 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/element_unary.cc @@ -56,25 +56,17 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("sum degree > 1") { positive_int degree = 2_p; - tl::expected result = get_output_shape( + CHECK_THROWS(get_output_shape( attrs, - make_input(SumDegree{degree}, DiscardCopyDegree{1_p}, 1_p, 1_p, 1_p)); - - CHECK_MESSAGE(!result.has_value(), - "Unexpected successful result: ", - result.error()); + make_input(SumDegree{degree}, DiscardCopyDegree{1_p}, 1_p, 1_p, 1_p))); } SUBCASE("discard copy degree > 1") { positive_int degree = 2_p; - tl::expected result = get_output_shape( + CHECK_THROWS(get_output_shape( attrs, - make_input(SumDegree{1_p}, DiscardCopyDegree{degree}, 1_p, 1_p, 1_p)); - - CHECK_MESSAGE(!result.has_value(), - "Unexpected successful result: ", - result.error()); + make_input(SumDegree{1_p}, DiscardCopyDegree{degree}, 1_p, 1_p, 1_p))); } } } diff --git a/lib/op-attrs/test/src/op-attrs/ops/flat.cc b/lib/op-attrs/test/src/op-attrs/ops/flat.cc index c4fe8a5250..8a2c609cd4 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/flat.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/flat.cc @@ -148,11 +148,7 @@ TEST_SUITE(FF_TEST_SUITE) { FFOrdered{1_p, 1_p, 2_p, 1_p}, }; - std::optional result = - optional_from_expected(get_output_parallel_dim_degrees(attrs, input)); - std::optional correct = std::nullopt; - - CHECK(result == correct); + CHECK_THROWS(get_output_parallel_dim_degrees(attrs, input)); } SUBCASE("allows sum parallelism") { @@ -162,14 +158,13 @@ TEST_SUITE(FF_TEST_SUITE) { FFOrdered{1_p, 1_p, 1_p, 1_p}, }; - std::optional result = - optional_from_expected(get_output_parallel_dim_degrees(attrs, input)); - std::optional correct = - ParallelTensorDimDegrees{ - SumDegree{2_p}, - DiscardCopyDegree{1_p}, - FFOrdered{1_p, 1_p, 1_p}, - }; + ParallelTensorDimDegrees result = + get_output_parallel_dim_degrees(attrs, input); + ParallelTensorDimDegrees correct = ParallelTensorDimDegrees{ + SumDegree{2_p}, + DiscardCopyDegree{1_p}, + FFOrdered{1_p, 1_p, 1_p}, + }; CHECK(result == correct); } @@ -181,14 +176,13 @@ TEST_SUITE(FF_TEST_SUITE) { FFOrdered{1_p, 1_p, 1_p, 1_p}, }; - std::optional result = - optional_from_expected(get_output_parallel_dim_degrees(attrs, input)); - std::optional correct = - ParallelTensorDimDegrees{ - SumDegree{1_p}, - DiscardCopyDegree{2_p}, - FFOrdered{1_p, 1_p, 1_p}, - }; + ParallelTensorDimDegrees result = + get_output_parallel_dim_degrees(attrs, input); + ParallelTensorDimDegrees correct = ParallelTensorDimDegrees{ + SumDegree{1_p}, + DiscardCopyDegree{2_p}, + FFOrdered{1_p, 1_p, 1_p}, + }; CHECK(result == correct); } diff --git a/lib/op-attrs/test/src/op-attrs/ops/linear.cc b/lib/op-attrs/test/src/op-attrs/ops/linear.cc index 4e0dd149ab..26259f3d08 100644 --- a/lib/op-attrs/test/src/op-attrs/ops/linear.cc +++ b/lib/op-attrs/test/src/op-attrs/ops/linear.cc @@ -288,4 +288,29 @@ TEST_SUITE(FF_TEST_SUITE) { } } } + + TEST_CASE("get_operator_to_input_mapping(LinearAttrs, nonnegative_int)") { + LinearAttrs attrs = LinearAttrs{ + /*out_channels=*/16_p, + /*use_bias=*/false, + /*data_type=*/DataType::FLOAT, + /*activation=*/Activation::RELU, + /*regularizer=*/std::nullopt, + }; + + ParallelTensorDimDegrees input_dims = ParallelTensorDimDegrees{ + /*sum_degree=*/SumDegree{2_p}, + /*discard_copy_dedgree=*/DiscardCopyDegree{1_p}, + /*shard_degrees=*/FFOrdered{ + 1_p, + 1_p, + }, + }; + + OperatorSpaceToParallelTensorSpaceMapping result = + get_operator_to_input_mapping(attrs, input_dims); + + // TODO(@lockshaw): implement some actual checks here + NOT_IMPLEMENTED(); + } } diff --git a/lib/op-attrs/test/src/op-attrs/parallel_tensor_dim_degrees.cc b/lib/op-attrs/test/src/op-attrs/parallel_tensor_dim_degrees.cc new file mode 100644 index 0000000000..6d0e072db5 --- /dev/null +++ b/lib/op-attrs/test/src/op-attrs/parallel_tensor_dim_degrees.cc @@ -0,0 +1,161 @@ +#include "op-attrs/parallel_tensor_dim_degrees.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "test/utils/doctest/fmt/set.h" +#include "test/utils/doctest/fmt/unordered_map.h" +#include "test/utils/doctest/fmt/unordered_set.h" +#include + +using namespace ::FlexFlow; + +static parallel_tensor_dim_idx_t shard_dim_idx_from_raw(int idx) { + return parallel_tensor_dim_idx_t{ff_dim_t{nonnegative_int{idx}}}; +} + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_parallel_tensor_degree_map") { + ParallelTensorDimDegrees degrees = ParallelTensorDimDegrees{ + SumDegree{3_p}, + DiscardCopyDegree{1_p}, + FFOrdered{ + 1_p, + 2_p, + 1_p, + }, + }; + + std::unordered_map result = + get_parallel_tensor_degree_map(degrees); + std::unordered_map correct = { + {parallel_tensor_dim_idx_t{ReplicaType::SUM}, 3_p}, + {parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}, 1_p}, + {shard_dim_idx_from_raw(0), 1_p}, + {shard_dim_idx_from_raw(1), 2_p}, + {shard_dim_idx_from_raw(2), 1_p}, + }; + + CHECK(result == correct); + } + + TEST_CASE("get_parallel_tensor_space_coordinates") { + ParallelTensorDimDegrees degrees = ParallelTensorDimDegrees{ + SumDegree{3_p}, + DiscardCopyDegree{1_p}, + FFOrdered{ + 1_p, + 2_p, + 1_p, + }, + }; + + std::unordered_set result = + get_parallel_tensor_space_coordinates(degrees); + std::unordered_set correct = { + ParallelTensorSpaceCoordinate{ + /*sum_idx=*/0_n, + /*discard_copy_idx=*/0_n, + /*shard_idxs=*/FFOrdered{0_n, 0_n, 0_n}, + }, + ParallelTensorSpaceCoordinate{ + /*sum_idx=*/1_n, + /*discard_copy_idx=*/0_n, + /*shard_idxs=*/FFOrdered{0_n, 0_n, 0_n}, + }, + ParallelTensorSpaceCoordinate{ + /*sum_idx=*/2_n, + /*discard_copy_idx=*/0_n, + /*shard_idxs=*/FFOrdered{0_n, 0_n, 0_n}, + }, + ParallelTensorSpaceCoordinate{ + /*sum_idx=*/0_n, + /*discard_copy_idx=*/0_n, + /*shard_idxs=*/FFOrdered{0_n, 1_n, 0_n}, + }, + ParallelTensorSpaceCoordinate{ + /*sum_idx=*/1_n, + /*discard_copy_idx=*/0_n, + /*shard_idxs=*/FFOrdered{0_n, 1_n, 0_n}, + }, + ParallelTensorSpaceCoordinate{ + /*sum_idx=*/2_n, + /*discard_copy_idx=*/0_n, + /*shard_idxs=*/FFOrdered{0_n, 1_n, 0_n}, + }, + }; + + CHECK(result == correct); + } + + TEST_CASE( + "get_nontrivial_parallel_tensor_dim_indices(ParallelTensorDimDegrees)") { + SUBCASE("a replica dim has degree 1") { + ParallelTensorDimDegrees degrees = ParallelTensorDimDegrees{ + SumDegree{3_p}, + DiscardCopyDegree{1_p}, + FFOrdered{4_p, 2_p, 4_p}, + }; + + std::set result = + get_nontrivial_parallel_tensor_dim_indices(degrees); + std::set correct = { + parallel_tensor_dim_idx_t{ReplicaType::SUM}, + shard_dim_idx_from_raw(0), + shard_dim_idx_from_raw(1), + shard_dim_idx_from_raw(2), + }; + + CHECK(result == correct); + } + + SUBCASE("a shard dim has degree 1") { + ParallelTensorDimDegrees degrees = ParallelTensorDimDegrees{ + SumDegree{3_p}, + DiscardCopyDegree{2_p}, + FFOrdered{1_p, 4_p, 1_p}, + }; + + std::set result = + get_nontrivial_parallel_tensor_dim_indices(degrees); + std::set correct = { + parallel_tensor_dim_idx_t{ReplicaType::SUM}, + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}, + shard_dim_idx_from_raw(1), + }; + + CHECK(result == correct); + } + + SUBCASE("no dims have degree 1") { + ParallelTensorDimDegrees degrees = ParallelTensorDimDegrees{ + SumDegree{3_p}, + DiscardCopyDegree{2_p}, + FFOrdered{4_p, 2_p, 5_p}, + }; + + std::set result = + get_nontrivial_parallel_tensor_dim_indices(degrees); + std::set correct = { + parallel_tensor_dim_idx_t{ReplicaType::SUM}, + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}, + shard_dim_idx_from_raw(0), + shard_dim_idx_from_raw(1), + shard_dim_idx_from_raw(2), + }; + + CHECK(result == correct); + } + + SUBCASE("all dims have degree 1") { + ParallelTensorDimDegrees degrees = ParallelTensorDimDegrees{ + SumDegree{1_p}, + DiscardCopyDegree{1_p}, + FFOrdered{1_p, 1_p, 1_p}, + }; + + std::set result = + get_nontrivial_parallel_tensor_dim_indices(degrees); + std::set correct = {}; + + CHECK(result == correct); + } + } +} diff --git a/lib/op-attrs/test/src/op-attrs/parallel_tensor_dim_idx_t.cc b/lib/op-attrs/test/src/op-attrs/parallel_tensor_dim_idx_t.cc new file mode 100644 index 0000000000..f826b3dbe2 --- /dev/null +++ b/lib/op-attrs/test/src/op-attrs/parallel_tensor_dim_idx_t.cc @@ -0,0 +1,83 @@ +#include +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "utils/containers/sorted_by.h" +#include "test/utils/doctest/fmt/vector.h" +#include "test/utils/rapidcheck.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_parallel_tensor_dim_ordering") { + DimOrdering + ordering = get_parallel_tensor_dim_ordering(); + + RC_SUBCASE( + "is antireflexive", + [&](parallel_tensor_dim_idx_t const &idx) { + RC_ASSERT(!(idx < idx)); + }); + + RC_SUBCASE( + "is antisymmetric", + [&](parallel_tensor_dim_idx_t const &a, + parallel_tensor_dim_idx_t const &b) { + RC_PRE(a < b); + + RC_ASSERT(!(b < a)); + }); + + RC_SUBCASE( + "is transitive", + [&](parallel_tensor_dim_idx_t const &a, + parallel_tensor_dim_idx_t const &b, + parallel_tensor_dim_idx_t const &c) { + RC_PRE(a < b); + RC_PRE(b < c); + + RC_ASSERT(a < c); + }); + + SUBCASE("sum is less than discard") { + bool result = ordering.lt(sum_dim_idx(), discard_copy_dim_idx()); + bool correct = true; + + CHECK(result == correct); + } + + SUBCASE("discard is less than shard dim") { + bool result = ordering.lt(discard_copy_dim_idx(), shard_dim_idx(ff_dim_t{0_n})); + bool correct = true; + + CHECK(result == correct); + } + + SUBCASE("shard dim 0 is less than shard dim 1") { + bool result = ordering.lt(shard_dim_idx(ff_dim_t{0_n}), shard_dim_idx(ff_dim_t{1_n})); + bool correct = true; + + CHECK(result == correct); + } + + SUBCASE("properly sorts a set of dimensions") { + std::unordered_set + input = { + sum_dim_idx(), + shard_dim_idx(ff_dim_t{1_n}), + shard_dim_idx(ff_dim_t{0_n}), + discard_copy_dim_idx(), + }; + + std::vector result = + sorted_by(input, get_parallel_tensor_dim_ordering().lt); + + std::vector correct = { + sum_dim_idx(), + discard_copy_dim_idx(), + shard_dim_idx(ff_dim_t{0_n}), + shard_dim_idx(ff_dim_t{1_n}), + }; + + CHECK(result == correct); + } + } +} diff --git a/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc b/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc index e3f3f4534e..944bef5bf7 100644 --- a/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc +++ b/lib/op-attrs/test/src/op-attrs/relative_ff_dim_t.cc @@ -5,7 +5,7 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("ff_dim_t_from_relative_ff_dim_t") { - nonnegative_int input_dim = 5_n; + num_tensor_dims_t input_dim = num_tensor_dims_t{5_n}; SUBCASE("relative index is zero") { relative_ff_dim_t relative_ff_dim = relative_ff_dim_t{0}; diff --git a/lib/op-attrs/test/src/op-attrs/tensor_dim_permutation.cc b/lib/op-attrs/test/src/op-attrs/tensor_dim_permutation.cc new file mode 100644 index 0000000000..ad306e56c7 --- /dev/null +++ b/lib/op-attrs/test/src/op-attrs/tensor_dim_permutation.cc @@ -0,0 +1,87 @@ +#include +#include "op-attrs/tensor_dim_permutation.h" +#include "test/utils/rapidcheck/doctest.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("TensorDimPermutation") { + SUBCASE("fails if constructed with a non-contiguous key set") { + CHECK_THROWS( + TensorDimPermutation{bidict{ + {ff_dim_t{2_n}, ff_dim_t{0_n}}, + {ff_dim_t{0_n}, ff_dim_t{1_n}}, + }} + ); + } + + SUBCASE("fails if constructed with a key set that doesn't start at 1") { + CHECK_THROWS( + TensorDimPermutation{bidict{ + {ff_dim_t{0_n}, ff_dim_t{1_n}}, + {ff_dim_t{1_n}, ff_dim_t{2_n}}, + }} + ); + } + + SUBCASE("can be constructed with empty bidict") { + TensorDimPermutation p = TensorDimPermutation{bidict{}}; + CHECK(p.num_tensor_dims() == num_tensor_dims_t{0_n}); + } + + SUBCASE("can be constructed with non-empty bidict") { + bidict b = bidict{ + {ff_dim_t{0_n}, ff_dim_t{2_n}}, + {ff_dim_t{1_n}, ff_dim_t{3_n}}, + {ff_dim_t{3_n}, ff_dim_t{0_n}}, + {ff_dim_t{2_n}, ff_dim_t{1_n}}, + }; + + TensorDimPermutation p = TensorDimPermutation{b}; + + SUBCASE("at_l") { + SUBCASE("key is present") { + ff_dim_t result = p.at_l(ff_dim_t{1_n}); + ff_dim_t correct = ff_dim_t{3_n}; + + CHECK(result == correct); + } + + SUBCASE("key is not present") { + CHECK_THROWS(p.at_l(ff_dim_t{4_n})); + } + } + + SUBCASE("at_r") { + SUBCASE("key is present") { + ff_dim_t result = p.at_r(ff_dim_t{1_n}); + ff_dim_t correct = ff_dim_t{2_n}; + + CHECK(result == correct); + } + + SUBCASE("key is not present") { + CHECK_THROWS(p.at_r(ff_dim_t{4_n})); + } + } + + SUBCASE("num_tensor_dims") { + num_tensor_dims_t result = p.num_tensor_dims(); + num_tensor_dims_t correct = num_tensor_dims_t{4_n}; + + CHECK(result == correct); + } + + SUBCASE("as_bidict") { + bidict result = p.as_bidict(); + bidict correct = b; + + CHECK(result == correct); + } + } + } + + TEST_CASE("Arbitrary") { + RC_SUBCASE([](TensorDimPermutation) {}); + } +} diff --git a/lib/pcg/include/pcg/cg_operator_plus_signature.struct.toml b/lib/pcg/include/pcg/cg_operator_plus_signature.struct.toml deleted file mode 100644 index f4714a87c8..0000000000 --- a/lib/pcg/include/pcg/cg_operator_plus_signature.struct.toml +++ /dev/null @@ -1,23 +0,0 @@ -namespace = "FlexFlow" -name = "CGOperatorPlusSignature" -features = [ - "eq", - "ord", - "hash", - "fmt", - "json", -] - -includes = [ - "op-attrs/computation_graph_op_attrs.dtg.h", - "pcg/cg_operator_tensor_shape_signature.dtg.h", - "", -] - -[[fields]] -name = "op_attrs" -type = "::FlexFlow::ComputationGraphOpAttrs" - -[[fields]] -name = "tensor_shape_signature" -type = "::FlexFlow::CGOperatorTensorShapeSignature" diff --git a/lib/pcg/include/pcg/cg_operator_tensor_shape_signature.h b/lib/pcg/include/pcg/cg_operator_tensor_shape_signature.h deleted file mode 100644 index 3629aaff43..0000000000 --- a/lib/pcg/include/pcg/cg_operator_tensor_shape_signature.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_CG_OPERATOR_TENSOR_SHAPE_SIGNATURE_H -#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_CG_OPERATOR_TENSOR_SHAPE_SIGNATURE_H - -#include "pcg/cg_operator_tensor_shape_signature.dtg.h" -#include "pcg/tensor_role.dtg.h" - -namespace FlexFlow { - -std::vector - tensor_shapes_for_role(CGOperatorTensorShapeSignature const &signature, - TensorRole tensor_role); - -TensorShape tensor_shape_for_role_and_index( - CGOperatorTensorShapeSignature const &signature, - TensorRole tensor_role, - nonnegative_int index); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/machine_compute_specification.h b/lib/pcg/include/pcg/machine_compute_specification.h new file mode 100644 index 0000000000..6b089312f9 --- /dev/null +++ b/lib/pcg/include/pcg/machine_compute_specification.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_MACHINE_COMPUTE_SPECIFICATION_H +#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_MACHINE_COMPUTE_SPECIFICATION_H + +#include "pcg/machine_compute_specification.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/device_type.dtg.h" + +namespace FlexFlow { + +positive_int get_num_gpus(MachineComputeSpecification const &ms); +positive_int get_num_cpus(MachineComputeSpecification const &ms); +positive_int get_num_devices(MachineComputeSpecification const &ms, + DeviceType const &device_type); +positive_int get_num_devices_per_node(MachineComputeSpecification const &ms, + DeviceType const &device_type); + +bool is_valid_machine_space_coordinate(MachineComputeSpecification const &ms, + MachineSpaceCoordinate const &coord); + +device_id_t get_device_id(MachineComputeSpecification const &ms, + MachineSpaceCoordinate const &coord); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/machine_compute_specification.struct.toml b/lib/pcg/include/pcg/machine_compute_specification.struct.toml new file mode 100644 index 0000000000..cdbdf70f80 --- /dev/null +++ b/lib/pcg/include/pcg/machine_compute_specification.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "MachineComputeSpecification" +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +includes = [ + "utils/positive_int/positive_int.h", +] + +[[fields]] +name = "num_nodes" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "num_cpus_per_node" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "num_gpus_per_node" +type = "::FlexFlow::positive_int" diff --git a/lib/pcg/include/pcg/machine_interconnect_specification.struct.toml b/lib/pcg/include/pcg/machine_interconnect_specification.struct.toml new file mode 100644 index 0000000000..11752dd9d0 --- /dev/null +++ b/lib/pcg/include/pcg/machine_interconnect_specification.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "MachineInterconnectSpecification" +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +includes = [ + "utils/units/bytes_per_second_t.h", +] + +[[fields]] +name = "inter_node_bandwidth" +type = "::FlexFlow::bytes_per_second_t" + +[[fields]] +name = "intra_node_bandwidth" +type = "::FlexFlow::bytes_per_second_t" diff --git a/lib/pcg/include/pcg/machine_space_coordinate.struct.toml b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml index 2528eab849..930daaf4f3 100644 --- a/lib/pcg/include/pcg/machine_space_coordinate.struct.toml +++ b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml @@ -5,7 +5,7 @@ features = [ "ord", "hash", "json", - # "rapidcheck", + "rapidcheck", "fmt", ] diff --git a/lib/pcg/include/pcg/machine_space_dim_subgrid.struct.toml b/lib/pcg/include/pcg/machine_space_dim_subgrid.struct.toml new file mode 100644 index 0000000000..b659486a8a --- /dev/null +++ b/lib/pcg/include/pcg/machine_space_dim_subgrid.struct.toml @@ -0,0 +1,27 @@ +namespace = "FlexFlow" +name = "MachineSpaceDimSubgrid" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", + "pcg/stride_t.dtg.h", + "utils/int_ge_two/int_ge_two.h", +] + +[[fields]] +name = "offset" +type = "::FlexFlow::nonnegative_int" + +[[fields]] +name = "stride" +type = "::FlexFlow::stride_t" + +[[fields]] +name = "num_points" +type = "::FlexFlow::int_ge_two" diff --git a/lib/pcg/include/pcg/machine_space_subgrid.struct.toml b/lib/pcg/include/pcg/machine_space_subgrid.struct.toml new file mode 100644 index 0000000000..558fc43924 --- /dev/null +++ b/lib/pcg/include/pcg/machine_space_subgrid.struct.toml @@ -0,0 +1,28 @@ +namespace = "FlexFlow" +name = "MachineSpaceSubgrid" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] + +includes = [ + "pcg/machine_space_dim_subgrid.dtg.h", + "", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h", + "utils/ord/vector.h", +] + +[[fields]] +name = "inter_node_strides" +type = "std::vector<::FlexFlow::MachineSpaceDimSubgrid>" + +[[fields]] +name = "intra_node_strides" +type = "std::vector<::FlexFlow::MachineSpaceDimSubgrid>" diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h deleted file mode 100644 index 48c6e9a7a6..0000000000 --- a/lib/pcg/include/pcg/machine_specification.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H -#define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H - -#include "pcg/device_id_t.dtg.h" -#include "pcg/device_type.dtg.h" -#include "pcg/machine_space_coordinate.dtg.h" -#include "pcg/machine_specification.dtg.h" - -namespace FlexFlow { - -positive_int get_num_gpus(MachineSpecification const &ms); -positive_int get_num_cpus(MachineSpecification const &ms); -positive_int get_num_devices(MachineSpecification const &ms, - DeviceType const &device_type); -positive_int get_num_devices_per_node(MachineSpecification const &ms, - DeviceType const &device_type); - -bool is_valid_machine_space_coordinate(MachineSpecification const &ms, - MachineSpaceCoordinate const &coord); - -device_id_t get_device_id(MachineSpecification const &ms, - MachineSpaceCoordinate const &coord); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/machine_specification.struct.toml b/lib/pcg/include/pcg/machine_specification.struct.toml index 49e9bd9d78..7c6a949f6c 100644 --- a/lib/pcg/include/pcg/machine_specification.struct.toml +++ b/lib/pcg/include/pcg/machine_specification.struct.toml @@ -5,30 +5,19 @@ features = [ "ord", "hash", "json", - # "rapidcheck", + "rapidcheck", "fmt", ] includes = [ - "utils/positive_int/positive_int.h", + "pcg/machine_compute_specification.dtg.h", + "pcg/machine_interconnect_specification.dtg.h", ] [[fields]] -name = "num_nodes" -type = "::FlexFlow::positive_int" +name = "compute_specification" +type = "::FlexFlow::MachineComputeSpecification" [[fields]] -name = "num_cpus_per_node" -type = "::FlexFlow::positive_int" - -[[fields]] -name = "num_gpus_per_node" -type = "::FlexFlow::positive_int" - -[[fields]] -name = "inter_node_bandwidth" -type = "float" - -[[fields]] -name = "intra_node_bandwidth" -type = "float" +name = "interconnect_specification" +type = "::FlexFlow::MachineInterconnectSpecification" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 6ed9e7dd9c..0d467e70a8 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -1,18 +1,20 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H -#include "machine_specification.dtg.h" -#include "machine_view.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/task_space_coordinate.dtg.h" #include "pcg/device_id_t.dtg.h" -#include "pcg/operator_task_space.dtg.h" -#include "pcg/task_space_coordinate.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/operator_space_to_machine_space_mapping.dtg.h" +#include "utils/bidict/bidict.h" #include #include #include namespace FlexFlow { -size_t num_dims(MachineView const &mv); +nonnegative_int mv_get_expected_task_space_num_dims(MachineView const &mv); DeviceType get_device_type(MachineView const &mv); @@ -26,20 +28,28 @@ MachineView machine_view_from_strides_and_machine_spec_dimensions( std::vector const &strides, std::vector const &dims); -std::optional - get_machine_space_coordinate(OperatorTaskSpace const &task, - MachineView const &mv, - TaskSpaceCoordinate const &coordinates, - MachineSpecification const &ms); +MachineSpaceCoordinate + get_machine_space_coordinate(OperatorTaskSpace const &operator_task_space, + MachineView const &machine_view, + TaskSpaceCoordinate const &task_space_coordinate); + +TaskSpaceCoordinate + mv_task_space_coord_for_machine_space_coord( + MachineView const &, + OperatorTaskSpace const &, + MachineSpaceCoordinate const &); + +OperatorSpaceToMachineSpaceMapping + get_coordinate_mapping_for_machine_view(OperatorTaskSpace const &operator_task_space, + MachineView const &machine_view); std::unordered_set get_machine_space_coordinates(OperatorTaskSpace const &task, - MachineView const &mv, - MachineSpecification const &ms); + MachineView const &mv); std::unordered_set get_device_ids(OperatorTaskSpace const &task, MachineView const &mv, - MachineSpecification const &ms); + MachineComputeSpecification const &ms); MachineView make_1d_machine_view(MachineSpaceCoordinate const &start, MachineSpecificationDimension const &dim, diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index e4de69eafc..6f9a7fdecf 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -5,7 +5,7 @@ features = [ "ord", "hash", "json", - # "rapidcheck", + "rapidcheck", "fmt", ] diff --git a/lib/pcg/include/pcg/machine_view_dimension.struct.toml b/lib/pcg/include/pcg/machine_view_dimension.struct.toml index 03b0ac51e4..84dd10bdcd 100644 --- a/lib/pcg/include/pcg/machine_view_dimension.struct.toml +++ b/lib/pcg/include/pcg/machine_view_dimension.struct.toml @@ -5,7 +5,7 @@ features = [ "ord", "hash", "json", - # "rapidcheck", + "rapidcheck", "fmt", ] diff --git a/lib/pcg/include/pcg/model_compilation.h b/lib/pcg/include/pcg/model_compilation.h deleted file mode 100644 index 1ab66161ec..0000000000 --- a/lib/pcg/include/pcg/model_compilation.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MODEL_COMPILATION_H -#define _FLEXFLOW_PCG_INCLUDE_PCG_MODEL_COMPILATION_H - -#include "pcg/computation_graph.h" -#include "pcg/optimizer.h" -#include "pcg/parallel_computation_graph/parallel_computation_graph.h" -#include "pcg/tensor_mapping.h" - -namespace FlexFlow { - -struct ModelCompilationInput { - ComputationGraph computation_graph; - Optimizer optimizer; -}; -FF_VISITABLE_STRUCT(ModelCompilationInput, computation_graph, optimizer); - -struct ModelCompilationResult { - ModelCompilationInput input; - ParallelComputationGraph pcg; - req tensor_mapping; -}; -FF_VISITABLE_STRUCT(ModelCompilationResult, input, pcg, tensor_mapping); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/num_points_t.struct.toml b/lib/pcg/include/pcg/num_points_t.struct.toml index b389245c63..a006e551e5 100644 --- a/lib/pcg/include/pcg/num_points_t.struct.toml +++ b/lib/pcg/include/pcg/num_points_t.struct.toml @@ -9,6 +9,10 @@ features = [ "fmt", ] +includes = [ + "utils/positive_int/positive_int.h" +] + [[fields]] name = "unwrapped" -type = "int" +type = "::FlexFlow::positive_int" diff --git a/lib/pcg/include/pcg/operator_space_to_machine_space_mapping.struct.toml b/lib/pcg/include/pcg/operator_space_to_machine_space_mapping.struct.toml new file mode 100644 index 0000000000..5cbce37016 --- /dev/null +++ b/lib/pcg/include/pcg/operator_space_to_machine_space_mapping.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "OperatorSpaceToMachineSpaceMapping" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "op-attrs/operator_task_space.dtg.h", + "op-attrs/task_space_coordinate.dtg.h", + "pcg/machine_space_coordinate.dtg.h", + "utils/bidict/bidict.h", +] + +[[fields]] +name = "raw_mapping" +type = "::FlexFlow::bidict<::FlexFlow::TaskSpaceCoordinate, ::FlexFlow::MachineSpaceCoordinate>" + +[[fields]] +name = "operator_task_space" +type = "::FlexFlow::OperatorTaskSpace" diff --git a/lib/pcg/include/pcg/operator_task_space.h b/lib/pcg/include/pcg/operator_task_space.h deleted file mode 100644 index ceb0146f15..0000000000 --- a/lib/pcg/include/pcg/operator_task_space.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_OPERATOR_TASK_SPACE_H -#define _FLEXFLOW_PCG_INCLUDE_OPERATOR_TASK_SPACE_H - -#include "pcg/operator_task_space.dtg.h" -#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" -#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" -#include "pcg/task_space_coordinate.dtg.h" -#include -#include - -namespace FlexFlow { - -std::unordered_set - get_task_space_coordinates(OperatorTaskSpace const &task); - -TaskSpaceCoordinate - get_task_space_maximum_coordinate(OperatorTaskSpace const &task); - -nonnegative_int num_dims(OperatorTaskSpace const &task); -positive_int num_tasks(OperatorTaskSpace const &task); - -OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, - parallel_layer_guid_t const &layer); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h index 3542e73dea..5063609a19 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h @@ -1,12 +1,16 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_H #define _FLEXFLOW_PCG_INCLUDE_PCG_PARALLEL_COMPUTATION_GRAPH_H +#include "op-attrs/operator_space_to_parallel_tensor_space_mapping.dtg.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/parallel_tensor_dim_degrees.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" #include "pcg/parallel_computation_graph/parallel_layer_added_result.dtg.h" #include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" #include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" #include +#include "op-attrs/operator_task_space_to_operator_task_space_mapping.dtg.h" namespace FlexFlow { @@ -27,10 +31,13 @@ ParallelLayerAddedResult add_parallel_layer( ParallelLayerAddedResult pcg_add_input_layer(ParallelComputationGraph &pcg, TensorShape const &tensor_shape); +OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &layer); + std::unordered_set - get_pcg_edges_from_layer_to_layer(ParallelComputationGraph const &, - parallel_layer_guid_t const &, - parallel_layer_guid_t const &); + get_pcg_edges_from_layer_to_layer(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &src, + parallel_layer_guid_t const &dst); std::unordered_set get_edges(ParallelComputationGraph const &); @@ -53,6 +60,18 @@ std::vector get_layer_outputs(ParallelComputationGraph const &, parallel_layer_guid_t const &); +std::vector + pcg_get_operator_to_incoming_mappings(ParallelComputationGraph const &, + parallel_layer_guid_t const &); + +std::vector + pcg_get_operator_to_output_mappings(ParallelComputationGraph const &, + parallel_layer_guid_t const &); + +OperatorTaskSpaceToOperatorTaskSpaceMapping + pcg_get_mapping_along_edge(ParallelComputationGraph const &, + ParallelComputationGraphEdge const &); + std::vector get_incoming_inputs(ParallelComputationGraph const &, parallel_layer_guid_t const &); @@ -60,6 +79,10 @@ std::vector get_incoming_weights(ParallelComputationGraph const &, parallel_layer_guid_t const &); +std::vector + get_incoming_input_degrees(ParallelComputationGraph const &, + parallel_layer_guid_t const &); + std::unordered_set get_successors(ParallelComputationGraph const &, parallel_layer_guid_t const &); diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h index 5bce560020..f3196fab1a 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph_edge.h @@ -11,6 +11,7 @@ parallel_tensor_guid_t get_parallel_tensor(ParallelComputationGraphEdge const &); parallel_layer_guid_t get_src_layer(ParallelComputationGraphEdge const &); parallel_layer_guid_t get_dst_layer(ParallelComputationGraphEdge const &); +nonnegative_int get_src_layer_output_idx(ParallelComputationGraphEdge const &); nonnegative_int get_dst_layer_input_idx(ParallelComputationGraphEdge const &); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index cdf17213f9..88ac9264c8 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -1,12 +1,12 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H #define _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/task_space_coordinate.dtg.h" #include "pcg/machine_space_offset.h" -#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" #include "pcg/machine_view.dtg.h" -#include "pcg/operator_task_space.dtg.h" #include "pcg/start_invariant_machine_view.dtg.h" -#include "pcg/task_space_coordinate.dtg.h" #include namespace FlexFlow { @@ -31,16 +31,14 @@ StartInvariantMachineView std::vector const &strides, std::vector const &dims); -std::optional +MachineSpaceOffset get_machine_space_offset(OperatorTaskSpace const &task, StartInvariantMachineView const &mv, - TaskSpaceCoordinate const &coordinates, - MachineSpecification const &ms); + TaskSpaceCoordinate const &coordinates); std::unordered_set get_machine_space_offsets(OperatorTaskSpace const &task, - StartInvariantMachineView const &mv, - MachineSpecification const &ms); + StartInvariantMachineView const &mv); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/tensor_direction.enum.toml b/lib/pcg/include/pcg/tensor_direction.enum.toml new file mode 100644 index 0000000000..92c2305f54 --- /dev/null +++ b/lib/pcg/include/pcg/tensor_direction.enum.toml @@ -0,0 +1,14 @@ +namespace = "FlexFlow" +name = "TensorDirection" +features = [ + "hash", + "fmt", + "rapidcheck", + "json", +] + +[[values]] +name = "INCOMING" + +[[values]] +name = "OUTPUT" diff --git a/lib/pcg/src/pcg/cg_operator_tensor_shape_signature.cc b/lib/pcg/src/pcg/cg_operator_tensor_shape_signature.cc deleted file mode 100644 index 90ffb85c9b..0000000000 --- a/lib/pcg/src/pcg/cg_operator_tensor_shape_signature.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include "pcg/cg_operator_tensor_shape_signature.h" - -namespace FlexFlow { - -std::vector - tensor_shapes_for_role(CGOperatorTensorShapeSignature const &signature, - TensorRole tensor_role) { - switch (tensor_role) { - case TensorRole::INPUT: - return signature.input_shapes; - case TensorRole::WEIGHT: - return signature.weight_shapes; - case TensorRole::OUTPUT: - return signature.output_shapes; - default: - PANIC("Unhandled tensor role", tensor_role); - }; -} - -TensorShape tensor_shape_for_role_and_index( - CGOperatorTensorShapeSignature const &signature, - TensorRole tensor_role, - nonnegative_int index) { - return tensor_shapes_for_role(signature, tensor_role) - .at(index.unwrap_nonnegative()); -} - -} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/computation_graph.cc b/lib/pcg/src/pcg/computation_graph.cc index b8917eed35..e095649683 100644 --- a/lib/pcg/src/pcg/computation_graph.cc +++ b/lib/pcg/src/pcg/computation_graph.cc @@ -166,7 +166,7 @@ static std::vector std::vector incoming_tensors = get_incoming_tensors(cg, l); std::vector incoming_tensor_roles = - get_incoming_tensor_roles(attrs, incoming_tensors.size()); + get_incoming_tensor_roles(attrs, num_elements(incoming_tensors)); assert(incoming_tensors.size() == incoming_tensor_roles.size()); diff --git a/lib/pcg/src/pcg/computation_graph_builder.cc b/lib/pcg/src/pcg/computation_graph_builder.cc index 4feefa713e..f1668004da 100644 --- a/lib/pcg/src/pcg/computation_graph_builder.cc +++ b/lib/pcg/src/pcg/computation_graph_builder.cc @@ -35,6 +35,7 @@ #include "utils/containers/concat_vectors.h" #include "utils/containers/enumerate_vector.h" #include "utils/containers/get_only.h" +#include "utils/containers/repeat_element.h" #include "utils/containers/transform.h" #include "utils/containers/transform_until.h" #include "utils/containers/vector_of.h" @@ -92,13 +93,13 @@ tensor_guid_t ComputationGraphBuilder::create_weight( } static void check_incoming_tensor_roles(LayerAttrs const &layer, - int num_inputs, - int num_weights) { + nonnegative_int num_inputs, + nonnegative_int num_weights) { std::vector correct = get_incoming_tensor_roles(layer.op_attrs, num_inputs + num_weights); std::vector current = concat_vectors( - std::vector(num_inputs, IncomingTensorRole::INPUT), - std::vector(num_weights, IncomingTensorRole::WEIGHT)); + repeat_element(num_inputs, IncomingTensorRole::INPUT), + repeat_element(num_weights, IncomingTensorRole::WEIGHT)); if (correct != current) { throw mk_runtime_error( @@ -114,7 +115,7 @@ std::vector ComputationGraphBuilder::add_layer( std::vector const &inputs, std::vector const &weight_initializers, std::optional> const &outputs) { - check_incoming_tensor_roles(layer, inputs.size(), weight_initializers.size()); + check_incoming_tensor_roles(layer, num_elements(inputs), num_elements(weight_initializers)); std::vector input_shapes = transform( inputs, [&](tensor_guid_t const &t) { return this->get_shape(t); }); @@ -159,12 +160,10 @@ tensor_guid_t ComputationGraphBuilder::broadcast(tensor_guid_t const &input, return input; } - if (!tensor_dims_is_broadcastable_to(input_shape.dims, target_dims)) { - throw mk_runtime_error(fmt::format( - "Cannot broadcast input tensor of dims {} to target dims {}", - input_shape.dims, - target_dims)); - } + ASSERT(tensor_dims_is_broadcastable_to(input_shape.dims, target_dims), + "Cannot broadcast input tensor to target dims", + input_shape.dims, + target_dims); BroadcastAttrs attrs = BroadcastAttrs{target_dims}; @@ -720,13 +719,13 @@ tensor_guid_t ComputationGraphBuilder::flat( relative_ff_dim_t start_dim, std::optional const &end_dim, std::optional const &maybe_name) { - nonnegative_int input_num_dims = get_num_dims(this->get_shape(input).dims); + num_tensor_dims_t input_num_dims = get_num_dims(this->get_shape(input).dims); ff_dim_t abs_start_dim = ff_dim_t_from_relative_ff_dim_t(start_dim, input_num_dims); ff_dim_t abs_end_dim = ff_dim_t_from_relative_ff_dim_t( - end_dim.value_or(relative_ff_dim_t{input_num_dims.unwrap_nonnegative()}), + end_dim.value_or(relative_ff_dim_t{input_num_dims.int_from_num_tensor_dims()}), input_num_dims); FlatAttrs attrs = FlatAttrs{ @@ -792,7 +791,7 @@ tensor_guid_t ComputationGraphBuilder::softmax( TensorShape input_shape = this->get_shape(input); relative_ff_dim_t dim = maybe_dim.value_or(relative_ff_dim_t{ - get_num_dims(input_shape.dims).unwrap_nonnegative() - 1}); + get_num_dims(input_shape.dims).int_from_num_tensor_dims() - 1}); SoftmaxAttrs attrs = SoftmaxAttrs{ ff_dim_t_from_relative_ff_dim_t(dim, get_num_dims(input_shape.dims))}; diff --git a/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc b/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc index 3511ccc269..4067dc6399 100644 --- a/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc +++ b/lib/pcg/src/pcg/file_format/v1/v1_computation_graph.cc @@ -1,5 +1,6 @@ #include "pcg/file_format/v1/v1_computation_graph.h" #include "pcg/file_format/v1/graphs/v1_labelled_dataflow_graph.h" +#include "utils/bidict/algorithms/transform_values.h" namespace FlexFlow { @@ -16,8 +17,8 @@ std::pair> raw = to_v1_including_node_numbering(cg.raw_graph); V1ComputationGraph v1_cg = V1ComputationGraph{raw.first}; - bidict v1_node_ids = - map_values(raw.second, [](Node const &n) { return layer_guid_t{n}; }); + bidict v1_node_ids = transform_values( + raw.second, [](Node const &n) { return layer_guid_t{n}; }); return {v1_cg, v1_node_ids}; } diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_compute_specification.cc similarity index 58% rename from lib/pcg/src/pcg/machine_specification.cc rename to lib/pcg/src/pcg/machine_compute_specification.cc index 3db949b99d..a8dfb27524 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_compute_specification.cc @@ -1,19 +1,19 @@ -#include "pcg/machine_specification.h" +#include "pcg/machine_compute_specification.h" #include "pcg/device_id.h" #include "utils/containers/transform.h" -#include "utils/exception.h" +#include namespace FlexFlow { -positive_int get_num_gpus(MachineSpecification const &ms) { +positive_int get_num_gpus(MachineComputeSpecification const &ms) { return ms.num_nodes * ms.num_gpus_per_node; } -positive_int get_num_cpus(MachineSpecification const &ms) { +positive_int get_num_cpus(MachineComputeSpecification const &ms) { return ms.num_nodes * ms.num_cpus_per_node; } -positive_int get_num_devices(MachineSpecification const &ms, +positive_int get_num_devices(MachineComputeSpecification const &ms, DeviceType const &device_type) { switch (device_type) { case DeviceType::GPU: @@ -21,11 +21,11 @@ positive_int get_num_devices(MachineSpecification const &ms, case DeviceType::CPU: return get_num_cpus(ms); default: - throw mk_runtime_error(fmt::format("Unknown DeviceType {}", device_type)); + PANIC("Unknown DeviceType", device_type); } } -positive_int get_num_devices_per_node(MachineSpecification const &ms, +positive_int get_num_devices_per_node(MachineComputeSpecification const &ms, DeviceType const &device_type) { switch (device_type) { case DeviceType::GPU: @@ -33,22 +33,20 @@ positive_int get_num_devices_per_node(MachineSpecification const &ms, case DeviceType::CPU: return ms.num_cpus_per_node; default: - throw mk_runtime_error(fmt::format("Unknown DeviceType {}", device_type)); + PANIC("Unknown DeviceType", device_type); } } -bool is_valid_machine_space_coordinate(MachineSpecification const &ms, +bool is_valid_machine_space_coordinate(MachineComputeSpecification const &ms, MachineSpaceCoordinate const &coord) { return (coord.node_idx < ms.num_nodes) && (coord.device_idx < get_num_devices_per_node(ms, coord.device_type)); } -device_id_t get_device_id(MachineSpecification const &ms, +device_id_t get_device_id(MachineComputeSpecification const &ms, MachineSpaceCoordinate const &coord) { - if (!is_valid_machine_space_coordinate(ms, coord)) { - throw mk_runtime_error(fmt::format( - "Invalid coordinate {} for machine specification {}", ms, coord)); - } + ASSERT(is_valid_machine_space_coordinate(ms, coord)); + nonnegative_int raw_idx = coord.node_idx * get_num_devices_per_node(ms, coord.device_type) + coord.device_idx; diff --git a/lib/pcg/src/pcg/machine_space_offset.cc b/lib/pcg/src/pcg/machine_space_offset.cc index 4aa79b3d1b..f2fd0a8ed3 100644 --- a/lib/pcg/src/pcg/machine_space_offset.cc +++ b/lib/pcg/src/pcg/machine_space_offset.cc @@ -2,26 +2,24 @@ #include "utils/exception.h" namespace FlexFlow { + MachineSpaceOffset get_machine_space_offset_from_coordinate( MachineSpaceCoordinate const &start, MachineSpaceCoordinate const &coord) { - if ((coord.device_idx < start.device_idx) || - (coord.node_idx < start.node_idx)) { - throw mk_runtime_error(fmt::format( - "One of the coordinates of start {} is greater than one of the " - "coordinates of coord {}, are you sure you didn't swap them?", - start, - coord)); - } - if (start.device_type != coord.device_type) { - throw mk_runtime_error( - fmt::format("{} has different DeviceType from {}", start, coord)); - } + ASSERT(start.device_idx <= coord.device_idx, + "The start device_idx is greater than one of the coord device_idx." + "Are you sure you didn't swap them?"); + + ASSERT(start.node_idx <= coord.device_idx, + "The start node_idx is greater than one of the coord node_idx." + "Are you sure you didn't swap them?"); + + ASSERT(start.device_type == coord.device_type); - return MachineSpaceOffset{coord.node_idx.unwrap_nonnegative() - - start.node_idx.unwrap_nonnegative(), - coord.device_idx.unwrap_nonnegative() - - start.device_idx.unwrap_nonnegative(), - coord.device_type}; + return MachineSpaceOffset{ + /*node_offset=*/coord.node_idx.unwrap_nonnegative() - start.node_idx.unwrap_nonnegative(), + /*device_offset=*/coord.device_idx.unwrap_nonnegative() - start.device_idx.unwrap_nonnegative(), + /*device_type=*/coord.device_type, + }; } } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 0fbb021a55..d227454dd4 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,12 +1,13 @@ #include "pcg/machine_view.h" +#include "op-attrs/operator_task_space.dtg.h" +#include "op-attrs/operator_task_space.h" +#include "pcg/machine_compute_specification.h" #include "pcg/machine_space_coordinate.dtg.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification.h" #include "pcg/machine_specification_dimension.dtg.h" #include "pcg/machine_view_dimension.dtg.h" -#include "pcg/operator_task_space.dtg.h" -#include "pcg/operator_task_space.h" #include "pcg/stride_t.dtg.h" +#include "utils/bidict/generate_bidict.h" #include "utils/containers/contains.h" #include "utils/containers/count.h" #include "utils/containers/filter.h" @@ -19,11 +20,12 @@ #include "utils/exception.h" #include "utils/nonnegative_int/nonnegative_range.h" #include "utils/nonnegative_int/num_elements.h" +#include "op-attrs/task_space_coordinate.h" namespace FlexFlow { -size_t num_dims(MachineView const &mv) { - return get_strides(mv).size(); +nonnegative_int mv_get_expected_task_space_num_dims(MachineView const &mv) { + return num_elements(get_strides(mv)); } DeviceType get_device_type(MachineView const &mv) { @@ -46,13 +48,7 @@ MachineView machine_view_from_strides_and_machine_spec_dimensions( MachineSpaceCoordinate const &start, std::vector const &strides, std::vector const &dims) { - if (strides.size() != dims.size()) { - throw mk_runtime_error(fmt::format( - "Length of strides ({}) and dims ({}) must match when calling " - "machine_view_from_strides_and_machine_spec_dimensions", - start, - strides)); - } + ASSERT(strides.size() == dims.size()); std::vector dimensions = zip_with_strict( strides, dims, [](stride_t s, MachineSpecificationDimension d) { return MachineViewDimension{s, d}; @@ -60,19 +56,17 @@ MachineView machine_view_from_strides_and_machine_spec_dimensions( return MachineView{start, dimensions}; } -std::optional get_machine_space_coordinate( - OperatorTaskSpace const &task, +MachineSpaceCoordinate get_machine_space_coordinate( + OperatorTaskSpace const &task_space, MachineView const &machine_view, - TaskSpaceCoordinate const &coord, - MachineSpecification const &machine_specification) { + TaskSpaceCoordinate const &coord) { - if (num_dims(machine_view) != task.degrees.size()) { - throw mk_runtime_error( - fmt::format("Dimension of machine_view ({}) must match dimension of " - "task ({}) when computing machine space coordinate", - machine_view, - task.degrees)); - } + ASSERT(mv_get_expected_task_space_num_dims(machine_view) == op_task_space_num_dims(task_space), + "Dimension of MachineView must match dimension of OperatorTaskSpace", + machine_view, + task_space); + ASSERT(op_task_space_num_dims(task_space) == task_space_coord_num_dims(coord)); + ASSERT(operator_task_space_contains_coord(task_space, coord)); auto get_dimension_indices_for_dimension = [&](MachineSpecificationDimension dimension) @@ -93,12 +87,14 @@ std::optional get_machine_space_coordinate( std::vector sizes = transform(dimension_indices, [&](nonnegative_int i) { - return task.degrees.at(i.unwrap_nonnegative()) * - mv_strides.at(i.unwrap_nonnegative()).unwrapped; + return ( + task_space.degrees.dims.at(i.unwrap_nonnegative()) * + mv_strides.at(i.unwrap_nonnegative()).unwrapped + ).positive_int_from_int_ge_two(); }); std::vector coord_points = transform(dimension_indices, [&](nonnegative_int i) { - return coord.raw_coord.at(i.unwrap_nonnegative()); + return coord.orthotope_coord.raw.at(i.unwrap_nonnegative()); }); std::vector strides = transform(dimension_indices, [&](nonnegative_int i) { @@ -130,38 +126,56 @@ std::optional get_machine_space_coordinate( MachineSpaceCoordinate ms_coord = MachineSpaceCoordinate{ node_idx, device_idx, get_device_type(machine_view)}; - if (!is_valid_machine_space_coordinate(machine_specification, ms_coord)) { - return std::nullopt; - } return ms_coord; } +TaskSpaceCoordinate + mv_task_space_coord_for_machine_space_coord( + MachineView const &machine_view, + OperatorTaskSpace const &operator_task_space, + MachineSpaceCoordinate const &machine_space_coord) { + OperatorSpaceToMachineSpaceMapping mapping = get_coordinate_mapping_for_machine_view(operator_task_space, machine_view); + + return mapping.raw_mapping.at_r(machine_space_coord); +} + +OperatorSpaceToMachineSpaceMapping + get_coordinate_mapping_for_machine_view( + OperatorTaskSpace const &operator_task_space, + MachineView const &machine_view) { + + return OperatorSpaceToMachineSpaceMapping{ + /*raw_mapping=*/generate_bidict( + get_task_space_coordinates(operator_task_space), + [&](TaskSpaceCoordinate const &task_space_coord) { + return get_machine_space_coordinate( + /*operator_task_space=*/operator_task_space, + /*machine_view=*/machine_view, + /*task_space_coordinate=*/task_space_coord); + }), + /*operator_task_space=*/operator_task_space, + }; +} + std::unordered_set get_machine_space_coordinates( - OperatorTaskSpace const &task, - MachineView const &machine_view, - MachineSpecification const &machine_specification) { + OperatorTaskSpace const &task_space, + MachineView const &machine_view) { + + ASSERT(op_task_space_num_dims(task_space) == mv_get_expected_task_space_num_dims(machine_view)); + return transform( - get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &coord) { - std::optional maybe_coordinate = - get_machine_space_coordinate( - task, machine_view, coord, machine_specification); - if (!maybe_coordinate.has_value()) { - throw mk_runtime_error( - fmt::format("In get_machine_space_coordinates, the given " - "OperatorTaskSpace {} and MachineView {} are not " - "compatible with the given MachineSpecification {}", - task, - machine_view, - machine_specification)); - } - return maybe_coordinate.value(); + get_task_space_coordinates(task_space), [&](TaskSpaceCoordinate const &coord) { + return get_machine_space_coordinate( + task_space, machine_view, coord); }); } -std::unordered_set get_device_ids(OperatorTaskSpace const &task, +std::unordered_set get_device_ids(OperatorTaskSpace const &task_space, MachineView const &mv, - MachineSpecification const &ms) { - return transform(get_machine_space_coordinates(task, mv, ms), + MachineComputeSpecification const &ms) { + ASSERT(op_task_space_num_dims(task_space) == mv_get_expected_task_space_num_dims(mv)); + + return transform(get_machine_space_coordinates(task_space, mv), [&](MachineSpaceCoordinate const &coord) { return get_device_id(ms, coord); }); diff --git a/lib/pcg/src/pcg/operator_task_space.cc b/lib/pcg/src/pcg/operator_task_space.cc deleted file mode 100644 index d612680de6..0000000000 --- a/lib/pcg/src/pcg/operator_task_space.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include "pcg/operator_task_space.h" -#include "op-attrs/parallel_tensor_shape.dtg.h" -#include "op-attrs/parallel_tensor_shape.h" -#include "pcg/operator_task_space.dtg.h" -#include "pcg/parallel_computation_graph/parallel_computation_graph.h" -#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" -#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" -#include "utils/containers/cartesian_product.h" -#include "utils/containers/extend.h" -#include "utils/containers/maximum.h" -#include "utils/containers/product.h" -#include "utils/containers/range.h" -#include "utils/containers/transform.h" -#include "utils/containers/unordered_set_of.h" -#include "utils/containers/vector_of.h" -#include "utils/fmt/unordered_set.h" -#include "utils/nonnegative_int/nonnegative_range.h" -#include "utils/nonnegative_int/num_elements.h" - -namespace FlexFlow { - -std::unordered_set - get_task_space_coordinates(OperatorTaskSpace const &task) { - - std::vector> coordinate_ranges = - transform(task.degrees, [&](positive_int num_points) { - return nonnegative_range( - num_points.nonnegative_int_from_positive_int()); - }); - - std::unordered_set> raw_coordinates = - unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set task_space_coordinates = - transform(raw_coordinates, [](std::vector const &point) { - return TaskSpaceCoordinate{point}; - }); - return task_space_coordinates; -} - -TaskSpaceCoordinate - get_task_space_maximum_coordinate(OperatorTaskSpace const &task) { - return maximum(get_task_space_coordinates(task)); -} - -nonnegative_int num_dims(OperatorTaskSpace const &task) { - return num_elements(task.degrees); -} - -positive_int num_tasks(OperatorTaskSpace const &task) { - return product(task.degrees); -} - -OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, - parallel_layer_guid_t const &layer) { - parallel_tensor_guid_t out_tensor = get_layer_outputs(pcg, layer).at(0); - ParallelTensorShape shape = get_parallel_tensor_shape(pcg, out_tensor); - - std::vector degrees; - extend(degrees, vector_of(ff_ordered_shard_degrees(shape))); - degrees.push_back(get_sum_degree(shape)); - degrees.push_back(get_discard_copy_degree(shape)); - return OperatorTaskSpace{degrees}; -} - -} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc b/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc index e3caffe260..50cbea9ca0 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/generate_weight_transform.cc @@ -1,6 +1,7 @@ #include "pcg/parallel_computation_graph/generate_weight_transform.h" #include "op-attrs/ff_ordered/enumerate.h" #include "op-attrs/parallel_tensor_shape.h" +#include namespace FlexFlow { @@ -10,12 +11,8 @@ std::unordered_set std::unordered_set result; positive_int sum_degree = get_sum_degree(goal); - if (sum_degree != 1) { - throw mk_runtime_error( - fmt::format("generate_weight_transform currently only supports " - "sum_degree = 1, but received {}", - sum_degree)); - } + ASSERT(sum_degree == 1, + "generate_weight_transform currently only supports sum_degree = 1"); positive_int discard_copy_degree = get_discard_copy_degree(goal); if (discard_copy_degree != 1) { diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc index 052d30df0f..f0cd187827 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc @@ -1,11 +1,16 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "op-attrs/get_incoming_tensor_roles.h" +#include "op-attrs/get_operator_space_to_parallel_tensor_space_mappings.h" +#include "op-attrs/operator_task_space_to_operator_task_space_mapping.h" +#include "op-attrs/parallel_tensor_shape.h" #include "op-attrs/pcg_operator_attrs.h" #include "op-attrs/shape_inference.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h" #include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" #include "utils/containers/concat_vectors.h" +#include "utils/containers/extend.h" #include "utils/containers/filtrans.h" #include "utils/containers/get_only.h" #include "utils/containers/repeat_element.h" @@ -29,6 +34,7 @@ #include "utils/graph/node/node.dtg.h" #include "utils/record_formatter.h" #include +#include "op-attrs/get_operator_task_space.h" namespace FlexFlow { @@ -117,6 +123,27 @@ ParallelLayerAddedResult pcg_add_input_layer(ParallelComputationGraph &pcg, /*output_flags=*/std::vector{CreateGrad::NO}); } +OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &layer) { + PCGOperatorAttrs op_attrs = pcg_get_op_attrs(pcg, layer); + + ASSERT(!is_parallel_op(op_attrs)); + + std::vector inputs = + get_incoming_inputs(pcg, layer); + + std::vector input_degrees = + transform(get_incoming_inputs(pcg, layer), + [&](parallel_tensor_guid_t input_guid) { + return get_parallel_degrees(get_parallel_tensor_shape(pcg, input_guid)); + }); + + + return get_operator_task_space( + compgraph_op_attrs_from_pcg_op_attrs(op_attrs).value(), + input_degrees); +} + std::unordered_set get_edges(ParallelComputationGraph const &pcg) { return transform(get_edges(pcg.raw_graph), [](DataflowEdge const &e) { @@ -179,6 +206,55 @@ std::vector [](DataflowOutput const &o) { return parallel_tensor_guid_t{o}; }); } +std::vector + pcg_get_operator_to_incoming_mappings(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &l) { + ComputationGraphOpAttrs op_attrs = compgraph_op_attrs_from_pcg_op_attrs(pcg_get_op_attrs(pcg, l)).value(); + + return get_operator_to_incoming_mappings( + /*attrs=*/op_attrs, + /*input_degrees=*/get_incoming_input_degrees(pcg, l)); +} + +std::vector + pcg_get_operator_to_output_mappings(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &l) { + ComputationGraphOpAttrs op_attrs = compgraph_op_attrs_from_pcg_op_attrs(pcg_get_op_attrs(pcg, l)).value(); + + return get_operator_to_output_mappings( + /*attrs=*/op_attrs, + /*input_degrees=*/get_incoming_input_degrees(pcg, l)); +} + +OperatorTaskSpaceToOperatorTaskSpaceMapping + pcg_get_mapping_along_edge(ParallelComputationGraph const &pcg, + ParallelComputationGraphEdge const &edge) { + + parallel_layer_guid_t src_layer = get_src_layer(edge); + nonnegative_int src_idx = get_src_layer_output_idx(edge); + parallel_tensor_guid_t tensor = parallel_tensor_guid_t{edge.raw_edge.src}; + parallel_layer_guid_t dst_layer = get_dst_layer(edge); + nonnegative_int dst_idx = get_dst_layer_input_idx(edge); + + ParallelTensorShape tensor_shape = get_parallel_tensor_shape(pcg, tensor); + + OperatorTaskSpace src_task_space = get_operator_task_space(pcg, src_layer); + + OperatorTaskSpace dst_task_space = get_operator_task_space(pcg, dst_layer); + + OperatorSpaceToParallelTensorSpaceMapping src_to_tensor_mapping = + pcg_get_operator_to_output_mappings(pcg, src_layer).at(src_idx.unwrap_nonnegative()); + + OperatorSpaceToParallelTensorSpaceMapping dst_to_tensor_mapping = + pcg_get_operator_to_incoming_mappings(pcg, dst_layer).at(dst_idx.unwrap_nonnegative()); + + return op_to_op_mapping_from_composition_through_tensor( + src_to_tensor_mapping, + dst_to_tensor_mapping); +} + + + static std::vector get_incoming_tensors_with_role(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &l, @@ -189,7 +265,7 @@ static std::vector get_incoming_tensors(pcg, l); std::vector incoming_tensor_roles = - get_incoming_tensor_roles(attrs, incoming_tensors.size()); + get_incoming_tensor_roles(attrs, num_elements(incoming_tensors)); assert(incoming_tensors.size() == incoming_tensor_roles.size()); @@ -221,6 +297,17 @@ std::vector return get_incoming_tensors_with_role(pcg, l, IncomingTensorRole::WEIGHT); } +std::vector + get_incoming_input_degrees(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &l) { + + return transform(get_incoming_inputs(pcg, l), + [&](parallel_tensor_guid_t t) { + return get_parallel_degrees(get_parallel_tensor_shape(pcg, t)); + }); +} + + std::unordered_set get_successors(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &l) { diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc index f7f3cfdcfd..24e3a1bf18 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc @@ -29,6 +29,7 @@ #include "utils/containers/count.h" #include "utils/containers/enumerate_vector.h" #include "utils/containers/get_only.h" +#include "utils/containers/repeat_element.h" #include "utils/containers/transform.h" #include "utils/containers/zip_with.h" @@ -531,13 +532,13 @@ parallel_tensor_guid_t ParallelComputationGraphBuilder::add_weight( } static void check_incoming_tensor_roles(ParallelLayerAttrs const &layer, - int num_inputs, - int num_weights) { + nonnegative_int num_inputs, + nonnegative_int num_weights) { std::vector correct = get_incoming_tensor_roles(layer.op_attrs, num_inputs + num_weights); std::vector current = concat_vectors( - std::vector(num_inputs, IncomingTensorRole::INPUT), - std::vector(num_weights, IncomingTensorRole::WEIGHT)); + repeat_element(num_inputs, IncomingTensorRole::INPUT), + repeat_element(num_weights, IncomingTensorRole::WEIGHT)); if (correct != current) { throw mk_runtime_error( @@ -553,12 +554,12 @@ std::vector ParallelComputationGraphBuilder::add_layer( std::vector const &inputs, std::vector const &weight_initializers) { - int num_weights_provided = + nonnegative_int num_weights_provided = count(weight_initializers, [](std::optional const &i) { return i.has_value(); }); - check_incoming_tensor_roles(layer, inputs.size(), num_weights_provided); + check_incoming_tensor_roles(layer, num_elements(inputs), num_weights_provided); std::vector input_shapes = transform(inputs, [&](parallel_tensor_guid_t const &i) { diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc index f37d08dc8a..00a53cb696 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph_edge.cc @@ -16,6 +16,10 @@ parallel_layer_guid_t get_dst_layer(ParallelComputationGraphEdge const &e) { return parallel_layer_guid_t{e.raw_edge.dst.node}; } +nonnegative_int get_src_layer_output_idx(ParallelComputationGraphEdge const &e) { + return e.raw_edge.src.idx; +} + nonnegative_int get_dst_layer_input_idx(ParallelComputationGraphEdge const &e) { return e.raw_edge.dst.idx; } diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index e9f864d416..3bc9f640b4 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -1,7 +1,7 @@ #include "pcg/start_invariant_machine_view.h" +#include "op-attrs/operator_task_space.h" #include "pcg/machine_space_offset.h" #include "pcg/machine_view.h" -#include "pcg/operator_task_space.h" #include "utils/containers/count.h" #include "utils/containers/filter.h" #include "utils/containers/scanl.h" @@ -54,33 +54,30 @@ StartInvariantMachineView return StartInvariantMachineView{dimensions, device_type}; } -std::optional get_machine_space_offset( +MachineSpaceOffset get_machine_space_offset( OperatorTaskSpace const &task, StartInvariantMachineView const &start_inv_machine_view, - TaskSpaceCoordinate const &coord, - MachineSpecification const &machine_specification) { + TaskSpaceCoordinate const &coord) { + MachineSpaceCoordinate dummy_start = MachineSpaceCoordinate{0_n, 0_n, get_device_type(start_inv_machine_view)}; + MachineView mv = machine_view_from_start_invariant(start_inv_machine_view, dummy_start); - std::optional ms_coord = - get_machine_space_coordinate(task, mv, coord, machine_specification); - if (ms_coord == std::nullopt) { - return std::nullopt; - } - return get_machine_space_offset_from_coordinate(dummy_start, - ms_coord.value()); + + MachineSpaceCoordinate ms_coord = get_machine_space_coordinate(task, mv, coord); + + return get_machine_space_offset_from_coordinate(dummy_start, ms_coord); } std::unordered_set get_machine_space_offsets( OperatorTaskSpace const &task, - StartInvariantMachineView const &start_inv_machine_view, - MachineSpecification const &machine_specification) { + StartInvariantMachineView const &start_inv_machine_view) { return transform( - get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &coord) { + get_task_space_coordinates(task), + [&](TaskSpaceCoordinate const &coord) { return get_machine_space_offset( - task, start_inv_machine_view, coord, machine_specification) - .value(); + task, start_inv_machine_view, coord); }); } diff --git a/lib/pcg/test/src/pcg/computation_graph_builder.cc b/lib/pcg/test/src/pcg/computation_graph_builder.cc index f7430b3403..513cfbfe18 100644 --- a/lib/pcg/test/src/pcg/computation_graph_builder.cc +++ b/lib/pcg/test/src/pcg/computation_graph_builder.cc @@ -1,6 +1,6 @@ #include "pcg/computation_graph_builder.h" -#include "doctest/doctest.h" #include "pcg/computation_graph.h" +#include using namespace ::FlexFlow; diff --git a/lib/pcg/test/src/pcg/machine_specification.cc b/lib/pcg/test/src/pcg/machine_compute_specification.cc similarity index 86% rename from lib/pcg/test/src/pcg/machine_specification.cc rename to lib/pcg/test/src/pcg/machine_compute_specification.cc index 4064f36679..c725da80ed 100644 --- a/lib/pcg/test/src/pcg/machine_specification.cc +++ b/lib/pcg/test/src/pcg/machine_compute_specification.cc @@ -1,4 +1,4 @@ -#include "pcg/machine_specification.h" +#include "pcg/machine_compute_specification.h" #include "pcg/device_id.h" #include @@ -6,13 +6,11 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("MachineSpecification") { - MachineSpecification ms = MachineSpecification{ + TEST_CASE("MachineComputeSpecification") { + MachineComputeSpecification ms = MachineComputeSpecification{ /*num_nodes=*/4_p, /*num_cpus_per_node=*/16_p, /*num_gpus_per_node=*/8_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0, }; SUBCASE("get_num_gpus") { diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index ecc196a118..8257792201 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -1,4 +1,6 @@ #include "pcg/machine_view.h" +#include "op-attrs/ff_ordered/ff_ordered.h" +#include "op-attrs/task_space_coordinate.h" #include "pcg/gpu_id_t.dtg.h" #include "test/utils/doctest/fmt/optional.h" #include "utils/containers/transform.h" @@ -9,23 +11,51 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("MachineView - utility functions") { + TEST_CASE("mv_get_expected_task_space_num_dims") { MachineView mv = MachineView{ - MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/0_n, DeviceType::GPU}, - {MachineViewDimension{stride_t{2_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2_p}, - MachineSpecificationDimension::INTER_NODE}}}; - - SUBCASE("num_dims") { - CHECK(num_dims(mv) == 2); - } - SUBCASE("get_device_type") { - CHECK(get_device_type(mv) == DeviceType::GPU); - } + MachineSpaceCoordinate{ + /*node_idx=*/0_n, + /*device_idx=*/0_n, + DeviceType::GPU, + }, + { + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTER_NODE, + }, + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTER_NODE, + }, + }, + }; + + CHECK(mv_get_expected_task_space_num_dims(mv) == 2_n); } + + TEST_CASE("get_device_type") { + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/0_n, + /*device_idx=*/0_n, + DeviceType::GPU, + }, + { + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTER_NODE, + }, + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTER_NODE, + }, + }, + }; + + CHECK(get_device_type(mv) == DeviceType::GPU); + } + TEST_CASE("get_machine_space_coordinate") { SUBCASE("1D case") { @@ -43,265 +73,309 @@ TEST_SUITE(FF_TEST_SUITE) { * Where the (x,) are the `TaskSpaceCoordinate`s, and the underlying grid * is the machine space. */ - OperatorTaskSpace task = OperatorTaskSpace{{3_p}}; + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 3_ge2, + }}, + }; + MachineView mv = MachineView{ - MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, - {MachineViewDimension{stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE}}}; - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/1_p, - /*num_cpus_per_node=*/6_p, - /*num_gpus_per_node=*/6_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; + MachineSpaceCoordinate{ + /*node_idx=*/0_n, + /*device_idx=*/1_n, + DeviceType::GPU, + }, + { + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; SUBCASE("Task with TaskSpaceCoordinate = (0,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n}); + + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0_n, + /*device_idx=*/1_n, + DeviceType::GPU, + }; + + CHECK(result == correct); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n}); + + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0_n, + /*device_idx=*/3_n, + DeviceType::GPU, + }; + + CHECK(result == correct); + } + + SUBCASE("Task with TaskSpaceCoordinate = (2,)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({2_n}); + + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}; + /*node_idx=*/0_n, + /*device_idx=*/5_n, + DeviceType::GPU, + }; + + CHECK(result == correct); + } + + SUBCASE("TaskSpaceCoordinate is out of bounds") { + TaskSpaceCoordinate coord = make_task_space_coordinate({4_n}); + + CHECK_THROWS( + get_machine_space_coordinate(task, mv, coord)); + } + } + + SUBCASE("2D case - projection on different dimensions") { + // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. + // The first dimension is projected onto the INTER (node) dimension with + // stride 1, while the second dimension is projected onto the INTRA + // (device) dimension with stride 2. The start of the projection defined + // by MachineView is at MachineSpaceCoordinates (1, 2), and the machine + // space has 3 nodes and 5 devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+ + * | | | | | | + * +-------+-------+-------+-------+-------+ + * | | | (0,0) | | (0,1) | + * +-------+-------+-------+-------+-------+ + * | | | (1,0) | | (1,1) | + * +-------+-------+-------+-------+-------+ + * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying + * grid is the machine space. + */ + + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + 2_ge2, + }}, + }; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/1_n, + /*device_idx=*/2_n, + DeviceType::GPU, + }, + { + MachineViewDimension{ + stride_t{1_p}, + MachineSpecificationDimension::INTER_NODE, + }, + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n, 0_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}; MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); + get_machine_space_coordinate(task, mv, coord); CHECK(correct == result); } - SUBCASE("Task with TaskSpaceCoordinate = (1,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n}}; + SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n, 1_n}); MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/3_n, DeviceType::GPU}; + /*node_idx=*/1_n, /*device_idx=*/4_n, DeviceType::GPU}; MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); + get_machine_space_coordinate(task, mv, coord); CHECK(correct == result); } - SUBCASE("Task with TaskSpaceCoordinate = (2,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2_n}}; + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n, 0_n}); MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/5_n, DeviceType::GPU}; + /*node_idx=*/2_n, /*device_idx=*/2_n, DeviceType::GPU}; MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); + get_machine_space_coordinate(task, mv, coord); CHECK(correct == result); } - SUBCASE("TaskSpaceCoordinate is out of bounds") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{4_n}}; - std::optional result = - get_machine_space_coordinate(task, mv, coord, ms); - std::optional correct = std::nullopt; - CHECK(result == correct); + SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n, 1_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/2_n, /*device_idx=*/4_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); } + } + + SUBCASE("2D case - projection on same dimension") { + // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. + // Both dimensions are projected on the INTRA (device) dimension, with + // strides 1 and 2 respectively. The start of the projection defined by + // MachineView is at MachineSpaceCoordinates (1, 0), and the machine + // space has 2 nodes and 6 devices per node. - SUBCASE("2D case - projection on different dimensions") { - // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. - // The first dimension is projected onto the INTER (node) dimension with - // stride 1, while the second dimension is projected onto the INTRA - // (device) dimension with stride 2. The start of the projection defined - // by MachineView is at MachineSpaceCoordinates (1, 2), and the machine - // space has 3 nodes and 5 devices per node. - - /** - * The tasks will thus be distributed like this: - * +-------+-------+-------+-------+-------+ - * | | | | | | - * +-------+-------+-------+-------+-------+ - * | | | (0,0) | | (0,1) | - * +-------+-------+-------+-------+-------+ - * | | | (1,0) | | (1,1) | - * +-------+-------+-------+-------+-------+ - * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying - * grid is the machine space. - */ - - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 2_p}}; - MachineView mv = MachineView{ - MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}, - {MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE}}}; - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/3_p, - /*num_cpus_per_node=*/5_p, - /*num_gpus_per_node=*/5_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; - - SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 0_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/4_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/2_n, /*device_idx=*/2_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/2_n, /*device_idx=*/4_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } + /** + * +-------+-------+-------+-------+-------+-------+ + * | (0,0) | (1,0) | | | (0,1) | (1,1) | + * +-------+-------+-------+-------+-------+-------+ + * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying + * grid is the machine space. + */ + + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + 2_ge2, + }}, + }; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/1_n, + /*device_idx=*/0_n, + DeviceType::GPU, + }, + { + MachineViewDimension{ + stride_t{1_p}, + MachineSpecificationDimension::INTRA_NODE, + }, + MachineViewDimension{ + stride_t{2_p}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n, 0_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/0_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n, 1_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/4_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); } - SUBCASE("2D case - projection on same dimension") { - // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. - // Both dimensions are projected on the INTRA (device) dimension, with - // strides 1 and 2 respectively. The start of the projection defined by - // MachineView is at MachineSpaceCoordinates (1, 0), and the machine - // space has 2 nodes and 6 devices per node. - - /** - * +-------+-------+-------+-------+-------+-------+ - * | (0,0) | (1,0) | | | (0,1) | (1,1) | - * +-------+-------+-------+-------+-------+-------+ - * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying - * grid is the machine space. - */ - - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 2_p}}; - MachineView mv = MachineView{ - MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/0_n, DeviceType::GPU}, - {MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTRA_NODE}, - MachineViewDimension{stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE}}}; - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/2_p, - /*num_cpus_per_node=*/6_p, - /*num_gpus_per_node=*/6_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; - - SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 0_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/0_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/4_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/1_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/5_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n, 0_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/1_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); } - SUBCASE("3D case") { - // This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks. - // - The first dimension is projected onto the INTER (node) dimension - // with stride 1, - // - The second dimension is projected onto the INTRA (device) dimension - // with stride 2, - // - The third dimension is projected onto the INTRA (device) dimension - // with stride 1. The start of the projection defined by MachineView is - // at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes - // and 8 devices per node. - - /** - * The tasks will thus be distributed like this: - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | |(0,0,0)| |(0,0,1)| |(0,1,0)| |(0,1,1)| - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | |(1,0,0)| |(1,0,1)| |(1,1,0)| |(1,1,1)| - * +-------+-------+-------+-------+-------+-------+-------+-------+ - * Where the (x,y,z) are the `TaskSpaceCoordinate`s, and the underlying - * grid is the machine space. - */ - - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 2_p, 2_p}}; - MachineView mv = MachineView{ - MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, - {MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTER_NODE}, - MachineViewDimension{stride_t{2_p}, - MachineSpecificationDimension::INTRA_NODE}, - MachineViewDimension{stride_t{1_p}, - MachineSpecificationDimension::INTRA_NODE}}}; - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/2_p, - /*num_cpus_per_node=*/8_p, - /*num_gpus_per_node=*/8_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; - - SUBCASE("Task with TaskSpaceCoordinate = (0,0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n, 0_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0_n, /*device_idx=*/3_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (1,1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n, 1_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/5_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } - - SUBCASE("Task with TaskSpaceCoordinate = (1,1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n, 1_n}}; - MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1_n, /*device_idx=*/7_n, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms).value(); - CHECK(correct == result); - } + SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n, 1_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/5_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); + } + } + + SUBCASE("3D case") { + // This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks. + // - The first dimension is projected onto the INTER (node) dimension + // with stride 1, + // - The second dimension is projected onto the INTRA (device) dimension + // with stride 2, + // - The third dimension is projected onto the INTRA (device) dimension + // with stride 1. The start of the projection defined by MachineView is + // at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes + // and 8 devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | |(0,0,0)| |(0,0,1)| |(0,1,0)| |(0,1,1)| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | |(1,0,0)| |(1,0,1)| |(1,1,0)| |(1,1,1)| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * Where the (x,y,z) are the `TaskSpaceCoordinate`s, and the underlying + * grid is the machine space. + */ + + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + 2_ge2, + 2_ge2, + }}, + }; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, + {MachineViewDimension{stride_t{1_p}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2_p}, + MachineSpecificationDimension::INTRA_NODE}, + MachineViewDimension{stride_t{1_p}, + MachineSpecificationDimension::INTRA_NODE}}}; + + SUBCASE("Task with TaskSpaceCoordinate = (0,0,1)") { + TaskSpaceCoordinate coord = + make_task_space_coordinate({0_n, 1_n, 0_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0_n, /*device_idx=*/3_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,1,0)") { + TaskSpaceCoordinate coord = + make_task_space_coordinate({1_n, 0_n, 1_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/5_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,1,1)") { + TaskSpaceCoordinate coord = + make_task_space_coordinate({1_n, 1_n, 1_n}); + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1_n, /*device_idx=*/7_n, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord); + CHECK(correct == result); } } } TEST_CASE("get_device_ids") { - SUBCASE("1D machine view") { // This operator has shape (3,), and thus 3 tasks. @@ -318,14 +392,18 @@ TEST_SUITE(FF_TEST_SUITE) { * Where the integers are the device ids and ((x)) are the devices we * select */ - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/1_p, - /*num_cpus_per_node=*/6_p, - /*num_gpus_per_node=*/6_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; - - OperatorTaskSpace task = OperatorTaskSpace{{3_p}}; + MachineComputeSpecification ms = + MachineComputeSpecification{ + /*num_nodes=*/1_p, + /*num_cpus_per_node=*/6_p, + /*num_gpus_per_node=*/6_p, + }; + + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 3_ge2, + }}, + }; MachineView mv = MachineView{ MachineSpaceCoordinate{ /*node_idx=*/0_n, /*device_idx=*/1_n, DeviceType::GPU}, @@ -363,14 +441,19 @@ TEST_SUITE(FF_TEST_SUITE) { * select */ - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/3_p, - /*num_cpus_per_node=*/5_p, - /*num_gpus_per_node=*/5_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; - - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 2_p}}; + MachineComputeSpecification ms = + MachineComputeSpecification{ + /*num_nodes=*/3_p, + /*num_cpus_per_node=*/5_p, + /*num_gpus_per_node=*/5_p, + }; + + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + 2_ge2, + }}, + }; MachineView mv = MachineView{ MachineSpaceCoordinate{ /*node_idx=*/1_n, /*device_idx=*/2_n, DeviceType::GPU}, diff --git a/lib/pcg/test/src/pcg/operator_task_space.cc b/lib/pcg/test/src/pcg/operator_task_space.cc deleted file mode 100644 index 4b01ed02fb..0000000000 --- a/lib/pcg/test/src/pcg/operator_task_space.cc +++ /dev/null @@ -1,66 +0,0 @@ -#include "pcg/operator_task_space.h" -#include "utils/fmt/unordered_set.h" -#include - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_task_space_coordinates") { - - SUBCASE("OperatorTaskSpace has 0 dimensions") { - OperatorTaskSpace task = OperatorTaskSpace{{}}; - - std::unordered_set correct = { - TaskSpaceCoordinate{{}}}; - std::unordered_set result = - get_task_space_coordinates(task); - CHECK(correct == result); - } - SUBCASE("OperatorTaskSpace has 2 dimensions") { - - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 2_p}}; - - std::unordered_set correct = {{ - TaskSpaceCoordinate{{0_n, 0_n}}, - TaskSpaceCoordinate{{0_n, 1_n}}, - TaskSpaceCoordinate{{1_n, 0_n}}, - TaskSpaceCoordinate{{1_n, 1_n}}, - }}; - std::unordered_set result = - get_task_space_coordinates(task); - CHECK(correct == result); - } - SUBCASE("OperatorTaskSpace has 3 dimensions") { - - OperatorTaskSpace task = OperatorTaskSpace{{1_p, 2_p, 2_p}}; - - std::unordered_set correct = {{ - TaskSpaceCoordinate{{0_n, 0_n, 0_n}}, - TaskSpaceCoordinate{{0_n, 0_n, 1_n}}, - TaskSpaceCoordinate{{0_n, 1_n, 0_n}}, - TaskSpaceCoordinate{{0_n, 1_n, 1_n}}, - }}; - std::unordered_set result = - get_task_space_coordinates(task); - CHECK(correct == result); - } - } - TEST_CASE("get_task_space_maximum_coordinate") { - SUBCASE("OperatorTaskSpace has 2 dimensions") { - - OperatorTaskSpace task = OperatorTaskSpace{{3_p, 2_p}}; - - TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2_n, 1_n}}; - TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); - CHECK(correct == result); - } - SUBCASE("OperatorTaskSpace has 3 dimensions") { - - OperatorTaskSpace task = OperatorTaskSpace{{3_p, 2_p, 4_p}}; - - TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2_n, 1_n, 3_n}}; - TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); - CHECK(correct == result); - } - } -} diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc index f223558868..a5d8aa058e 100644 --- a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc +++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph.cc @@ -1,4 +1,5 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "op-attrs/operator_task_space_to_operator_task_space_mapping.h" #include "op-attrs/ops/element_unary.h" #include "op-attrs/ops/linear.h" #include "op-attrs/ops/replicate.h" @@ -18,7 +19,7 @@ static ParallelLayerAttrs make_layer_attrs(T const &op_attrs) { }; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("topological_ordering") { + TEST_CASE("topological_ordering(ParallelComputationGraph)") { // TODO(@lockshaw) should probably be replaced with a rapidcheck test that // compares ParallelComputationGraph to DataflowGraph, but since we // currently don't have rapidcheck generation for DataflowGraph this will @@ -342,4 +343,152 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(pcgs_are_isomorphic(result, correct)); } + + TEST_CASE("pcg_get_mapping_along_edge") { + ParallelComputationGraph pcg = empty_parallel_computation_graph(); + + TensorShape input_shape = TensorShape{ + TensorDims{ + FFOrdered{ + 10_p, + 12_p, + }, + }, + DataType::FLOAT, + }; + + ParallelTensorShape par_input_shape = lift_to_parallel(input_shape); + + ParallelLayerAttrs partition_attrs = ParallelLayerAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ + RepartitionAttrs{ + /*repartition_dim=*/ff_dim_t{0_n}, + /*repartition_degree=*/2_p, + }, + }, + /*name=*/std::nullopt, + }; + + ParallelLayerAttrs relu_attrs = ParallelLayerAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ + ElementUnaryAttrs{ + /*op_type=*/OperatorType::RELU, + /*scalar=*/std::nullopt, + }, + }, + /*name=*/std::nullopt, + }; + + SUBCASE("trivial mapping (relu into relu)") { + ParallelLayerAddedResult input = pcg_add_input_layer(pcg, input_shape); + parallel_tensor_guid_t t_input = get_only(input.outputs); + ParallelLayerAddedResult partition_input = + add_parallel_layer(pcg, partition_attrs, {t_input}, {}); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); + + ParallelLayerAddedResult layer_1 = + add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); + parallel_tensor_guid_t t_layer_1 = get_only(layer_1.outputs); + ParallelLayerAddedResult layer_2 = + add_parallel_layer(pcg, relu_attrs, {t_layer_1}, {}); + + ParallelComputationGraphEdge edge + = get_only( + get_pcg_edges_from_layer_to_layer( + /*pcg=*/pcg, + /*src=*/layer_1.parallel_layer, + /*dst=*/layer_2.parallel_layer)); + + OperatorTaskSpaceToOperatorTaskSpaceMapping result = pcg_get_mapping_along_edge(pcg, edge); + + DimDomain layer_1_task_space = DimDomain{{ + {operator_task_space_dim_idx_t{0_n}, 2_p}, + }}; + + DimDomain layer_2_task_space = layer_1_task_space; + + auto make_coord = [](nonnegative_int x) { + return DimCoord{ + std::unordered_map{ + {operator_task_space_dim_idx_t{0_n}, x}, + }, + }; + }; + + OperatorTaskSpaceToOperatorTaskSpaceMapping correct = OperatorTaskSpaceToOperatorTaskSpaceMapping{ + DimDomainMapping{ + bidict, DimCoord>{ + {make_coord(0_n), make_coord(0_n)}, + {make_coord(1_n), make_coord(1_n)}, + }, + layer_1_task_space, + layer_2_task_space, + }, + }; + + CHECK(result == correct); + } + + SUBCASE("nontrivial mapping (linear into linear)") { + ParallelLayerAddedResult input = pcg_add_input_layer(pcg, input_shape); + parallel_tensor_guid_t t_input = get_only(input.outputs); + ParallelLayerAddedResult partition_input = + add_parallel_layer(pcg, partition_attrs, {t_input}, {}); + parallel_tensor_guid_t t_partition_input = get_only(partition_input.outputs); + + ParallelLayerAttrs transpose_attrs = ParallelLayerAttrs{ + /*op_attrs=*/PCGOperatorAttrs{ + TransposeAttrs{ + TensorDimPermutation{ + bidict{ + {ff_dim_t{0_n}, ff_dim_t{1_n}}, + {ff_dim_t{1_n}, ff_dim_t{0_n}}, + }, + }, + }, + }, + /*name=*/std::nullopt, + }; + + ParallelLayerAddedResult layer_1 = + add_parallel_layer(pcg, relu_attrs, {t_partition_input}, {}); + parallel_tensor_guid_t t_layer_1 = get_only(layer_1.outputs); + ParallelLayerAddedResult layer_2 = + add_parallel_layer(pcg, transpose_attrs, {t_layer_1}, {}); + + ParallelComputationGraphEdge edge + = get_only( + get_pcg_edges_from_layer_to_layer( + /*pcg=*/pcg, + /*src=*/layer_1.parallel_layer, + /*dst=*/layer_2.parallel_layer)); + + OperatorTaskSpaceToOperatorTaskSpaceMapping result = pcg_get_mapping_along_edge(pcg, edge); + + DimDomain layer_1_task_space = DimDomain{{ + {operator_task_space_dim_idx_t{0_n}, 2_p}, + }}; + + DimDomain layer_2_task_space = layer_1_task_space; + + auto make_coord = [](nonnegative_int x) { + return DimCoord{ + std::unordered_map{ + {operator_task_space_dim_idx_t{0_n}, x}, + }, + }; + }; + + OperatorTaskSpaceToOperatorTaskSpaceMapping correct = OperatorTaskSpaceToOperatorTaskSpaceMapping{ + DimDomainMapping{ + bidict, DimCoord>{ + {make_coord(0_n), make_coord(1_n)}, + {make_coord(1_n), make_coord(0_n)}, + }, + layer_1_task_space, + layer_2_task_space, + }, + }; + } + } } diff --git a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc index 1682ac6254..8dfabf67ac 100644 --- a/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc +++ b/lib/pcg/test/src/pcg/parallel_computation_graph/parallel_computation_graph_builder.cc @@ -212,25 +212,25 @@ TEST_SUITE(FF_TEST_SUITE) { }); CHECK_MESSAGE(layers.size() == 7, "Incorrect layers ", layers); - auto num_attrs_of_type = [&](OperatorType op_type) -> int { + auto num_attrs_of_type = [&](OperatorType op_type) -> nonnegative_int { return count(values(layers), [&](ParallelLayerAttrs const &l) { return get_op_type(l) == op_type; }); }; - int num_weight_attrs = num_attrs_of_type(OperatorType::WEIGHT); + nonnegative_int num_weight_attrs = num_attrs_of_type(OperatorType::WEIGHT); CHECK(num_weight_attrs == 2); - int num_input_attrs = num_attrs_of_type(OperatorType::INPUT); + nonnegative_int num_input_attrs = num_attrs_of_type(OperatorType::INPUT); CHECK(num_input_attrs == 1); - int num_conv_attrs = num_attrs_of_type(OperatorType::CONV2D); + nonnegative_int num_conv_attrs = num_attrs_of_type(OperatorType::CONV2D); CHECK(num_conv_attrs == 1); - int num_replicate_attrs = num_attrs_of_type(OperatorType::REPLICATE); + nonnegative_int num_replicate_attrs = num_attrs_of_type(OperatorType::REPLICATE); CHECK(num_replicate_attrs == 2); - int num_partition_attrs = num_attrs_of_type(OperatorType::REPARTITION); + nonnegative_int num_partition_attrs = num_attrs_of_type(OperatorType::REPARTITION); CHECK(num_partition_attrs == 1); parallel_layer_guid_t conv_guid = get_only(without_nullopts(transform( diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index afd6ad6b33..3896fabeb4 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -1,4 +1,5 @@ #include "pcg/start_invariant_machine_view.h" +#include "op-attrs/task_space_coordinate.h" #include "utils/fmt/unordered_set.h" #include "utils/fmt/vector.h" #include @@ -94,43 +95,47 @@ TEST_SUITE(FF_TEST_SUITE) { * | (0,) | | (1,) | | (2,) | | * +-------+-------+-------+-------+-------+-------+ */ - OperatorTaskSpace task = OperatorTaskSpace{{3_p}}; + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 3_ge2, + }}, + }; StartInvariantMachineView simv = StartInvariantMachineView{ {MachineViewDimension{stride_t{2_p}, MachineSpecificationDimension::INTRA_NODE}}, DeviceType::GPU}; - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/1_p, - /*num_cpus_per_node=*/6_p, - /*num_gpus_per_node=*/6_p, - /*inter_node_bandwidth=*/0.0, - /*intra_node_bandwidth=*/0.0}; + MachineComputeSpecification ms = + MachineComputeSpecification{ + /*num_nodes=*/1_p, + /*num_cpus_per_node=*/6_p, + /*num_gpus_per_node=*/6_p, + }; SUBCASE("get_machine_space_offset") { SUBCASE("Task with TaskSpaceCoordinate = (0,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n}); MachineSpaceOffset correct = MachineSpaceOffset{0, 0, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n}); MachineSpaceOffset correct = MachineSpaceOffset{0, 2, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (2,)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({2_n}); MachineSpaceOffset correct = MachineSpaceOffset{0, 4, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } } @@ -141,7 +146,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineSpaceOffset{0, 2, DeviceType::GPU}, MachineSpaceOffset{0, 4, DeviceType::GPU}}; std::unordered_set result = - get_machine_space_offsets(task, simv, ms); + get_machine_space_offsets(task, simv); CHECK(correct == result); } } @@ -162,54 +167,59 @@ TEST_SUITE(FF_TEST_SUITE) { * +-------+-------+-------+-------+ */ - OperatorTaskSpace task = OperatorTaskSpace{{2_p, 2_p}}; + OperatorTaskSpace task = OperatorTaskSpace{ + MinimalOrthotope{{ + 2_ge2, + 2_ge2, + }}, + }; StartInvariantMachineView simv = StartInvariantMachineView{ {MachineViewDimension{stride_t{1_p}, MachineSpecificationDimension::INTER_NODE}, MachineViewDimension{stride_t{2_p}, MachineSpecificationDimension::INTRA_NODE}}, DeviceType::GPU}; - MachineSpecification ms = - MachineSpecification{/*num_nodes=*/2_p, - /*num_cpus_per_node=*/4_p, - /*num_gpus_per_node=*/4_p, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0}; + MachineComputeSpecification ms = + MachineComputeSpecification{ + /*num_nodes=*/2_p, + /*num_cpus_per_node=*/4_p, + /*num_gpus_per_node=*/4_p, + }; SUBCASE("get_machine_space_offset") { SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 0_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n, 0_n}); MachineSpaceOffset correct = MachineSpaceOffset{0, 0, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0_n, 1_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({0_n, 1_n}); MachineSpaceOffset correct = MachineSpaceOffset{0, 2, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 0_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n, 0_n}); MachineSpaceOffset correct = MachineSpaceOffset{1, 0, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1_n, 1_n}}; + TaskSpaceCoordinate coord = make_task_space_coordinate({1_n, 1_n}); MachineSpaceOffset correct = MachineSpaceOffset{1, 2, DeviceType::GPU}; MachineSpaceOffset result = - get_machine_space_offset(task, simv, coord, ms).value(); + get_machine_space_offset(task, simv, coord); CHECK(correct == result); } } @@ -221,7 +231,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineSpaceOffset{1, 0, DeviceType::GPU}, MachineSpaceOffset{1, 2, DeviceType::GPU}}; std::unordered_set result = - get_machine_space_offsets(task, simv, ms); + get_machine_space_offsets(task, simv); CHECK(correct == result); } } diff --git a/lib/runtime/src/fused_op_attrs.h b/lib/runtime/src/fused_op_attrs.h index a8ea524165..a1ab876167 100644 --- a/lib/runtime/src/fused_op_attrs.h +++ b/lib/runtime/src/fused_op_attrs.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_RUNTIME_SRC_FUSED_OP_ATTRS_H #include "op-attrs/get_op_type.h" -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.h" #include "operator.h" #include "utils/visitable.h" diff --git a/lib/runtime/src/ops/fused_parallel_op_attrs.h b/lib/runtime/src/ops/fused_parallel_op_attrs.h index 454b7caead..4dd64199d3 100644 --- a/lib/runtime/src/ops/fused_parallel_op_attrs.h +++ b/lib/runtime/src/ops/fused_parallel_op_attrs.h @@ -1,7 +1,6 @@ -#ifndef _FLEXFLOW_FUSED_PARALLEL_OP_ATTRS_H -#define _FLEXFLOW_FUSED_PARALLEL_OP_ATTRS_H +#ifndef _FLEXFLOW_LIB_RUNTIME_INCLUDE_OPS_FUSED_PARALLEL_OP_ATTRS_H +#define _FLEXFLOW_LIB_RUNTIME_INCLUDE_OPS_FUSED_PARALLEL_OP_ATTRS_H -#include "op-attrs/ops/core.h" #include "op-attrs/parallel_tensor_shape.h" #include "parallel_op_info.h" #include "utils/visitable.h" diff --git a/lib/runtime/test/src/main.cc b/lib/runtime/test/src/main.cc index 9522fa7fdb..0a3f254ea8 100644 --- a/lib/runtime/test/src/main.cc +++ b/lib/runtime/test/src/main.cc @@ -1,2 +1,2 @@ #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN -#include "doctest/doctest.h" +#include diff --git a/lib/runtime/test/src/test_op_task_spec.cc b/lib/runtime/test/src/test_op_task_spec.cc index bb0bee567c..0909c42bf3 100644 --- a/lib/runtime/test/src/test_op_task_spec.cc +++ b/lib/runtime/test/src/test_op_task_spec.cc @@ -1,6 +1,6 @@ -#include "doctest/doctest.h" #include "op_task_invocation.h" #include "op_task_signature.h" +#include using namespace FlexFlow; diff --git a/lib/runtime/test/src/test_serialization.cc b/lib/runtime/test/src/test_serialization.cc index 471f2a2709..2c5d680071 100644 --- a/lib/runtime/test/src/test_serialization.cc +++ b/lib/runtime/test/src/test_serialization.cc @@ -1,7 +1,7 @@ -#include "doctest/doctest.h" #include "legion/legion_utilities.h" #include "op-attrs/ffconst.h" #include "serialization.h" +#include #include using namespace FlexFlow; diff --git a/lib/substitution-generator/test/substitution-generator/legacy_rules.cc b/lib/substitution-generator/test/substitution-generator/legacy_rules.cc index 4dd9bb8cc4..19102d9670 100644 --- a/lib/substitution-generator/test/substitution-generator/legacy_rules.cc +++ b/lib/substitution-generator/test/substitution-generator/legacy_rules.cc @@ -1,5 +1,5 @@ #include "substitution-generator/legacy_rules.h" -#include "doctest/doctest.h" +#include using namespace FlexFlow; using nlohmann::json; diff --git a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml index 1994d54f38..76974593a6 100644 --- a/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml +++ b/lib/substitutions/include/substitutions/operator_pattern/operator_attribute_value.variant.toml @@ -22,6 +22,7 @@ includes = [ "", "utils/nonnegative_int/nonnegative_int.h", "utils/positive_int/positive_int.h", + "op-attrs/tensor_dim_permutation.h", ] src_includes = [ @@ -78,3 +79,6 @@ type = "::FlexFlow::TensorDims" [[values]] type = "::FlexFlow::DataType" + +[[values]] +type = "::FlexFlow::TensorDimPermutation" diff --git a/lib/substitutions/include/substitutions/unity_substitution_set.h b/lib/substitutions/include/substitutions/unity_substitution_set.h index 183f76ac8a..be1a2101d0 100644 --- a/lib/substitutions/include/substitutions/unity_substitution_set.h +++ b/lib/substitutions/include/substitutions/unity_substitution_set.h @@ -1,14 +1,14 @@ #ifndef _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_UNITY_SUBSTITUTION_SET_H #define _FLEXFLOW_LIB_SUBSTITUTIONS_INCLUDE_SUBSTITUTIONS_UNITY_SUBSTITUTION_SET_H -#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_compute_specification.dtg.h" #include "substitutions/substitution.dtg.h" #include "utils/fmt/vector.h" namespace FlexFlow { std::vector - get_substitution_set(MachineSpecification const &resources); + get_substitution_set(MachineComputeSpecification const &resources); Substitution create_combine_inception(nonnegative_int num_convs, nonnegative_int num_dims, diff --git a/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc b/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc index 61bfe15d7b..f072d972e0 100644 --- a/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc +++ b/lib/substitutions/src/substitutions/apply_substitution/apply_substitution.cc @@ -9,7 +9,7 @@ #include "substitutions/sub_parallel_computation_graph_data.dtg.h" #include "substitutions/sub_parallel_computation_graph_edge.h" #include "utils/containers/keys.h" -#include "utils/containers/merge_maps.h" +#include "utils/containers/binary_merge_disjoint_maps.h" #include "utils/containers/restrict_keys.h" #include "utils/containers/set_minus.h" #include "utils/containers/values.h" @@ -46,8 +46,8 @@ SubParallelComputationGraph std::unordered_map post_node_data_from_sub = output_graph_data.node_data; - return merge_disjoint_maps(post_node_data_from_orig, - post_node_data_from_sub); + return binary_merge_disjoint_maps(post_node_data_from_orig, + post_node_data_from_sub); }(); std::unordered_set post_edges = [&] { @@ -148,8 +148,8 @@ SubParallelComputationGraph std::unordered_map post_value_data_from_sub = output_graph_data.value_data; - return merge_disjoint_maps(post_value_data_from_orig, - post_value_data_from_sub); + return binary_merge_disjoint_maps(post_value_data_from_orig, + post_value_data_from_sub); }(); SubParallelComputationGraphData post_data = SubParallelComputationGraphData{ diff --git a/lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc b/lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc index 272c5f2dd5..45d7d1804c 100644 --- a/lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc +++ b/lib/substitutions/src/substitutions/apply_substitution/evaluate_substitution_output.cc @@ -2,6 +2,8 @@ #include "substitutions/apply_substitution/perform_shape_inference.h" #include "substitutions/output_graph/output_operator_attrs_assignment.h" #include "substitutions/sub_parallel_computation_graph.h" +#include "utils/bidict/algorithms/transform_keys.h" +#include "utils/bidict/algorithms/transform_values.h" #include "utils/containers/map_keys.h" #include "utils/containers/map_values.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/permute_input_ids.h" @@ -47,18 +49,20 @@ std::pair }); bidict result_input_map = - map_keys(map_values(new_input_id_permutation, - [](DataflowGraphInput const &i) { - return OutputGraphExprInput{i}; - }), - [](NewDataflowGraphInput const &i) { - return input_parallel_tensor_guid_t{i.raw_input}; - }); + transform_keys(transform_values(new_input_id_permutation, + [](DataflowGraphInput const &i) { + return OutputGraphExprInput{i}; + }), + [](NewDataflowGraphInput const &i) { + return input_parallel_tensor_guid_t{i.raw_input}; + }); - bidict result_node_map = map_keys( - map_values(new_node_id_permutation, - [](Node const &n) { return OutputGraphExprNode{n}; }), - [](NewNode const &n) { return parallel_layer_guid_t{n.raw_node}; }); + bidict result_node_map = + transform_keys( + transform_values( + new_node_id_permutation, + [](Node const &n) { return OutputGraphExprNode{n}; }), + [](NewNode const &n) { return parallel_layer_guid_t{n.raw_node}; }); std::unordered_map input_shapes = map_values(map_keys(match.input_assignment, diff --git a/lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc b/lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc index 87dd5e6cbd..c1a0885004 100644 --- a/lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc +++ b/lib/substitutions/src/substitutions/apply_substitution/perform_shape_inference.cc @@ -9,6 +9,7 @@ #include "utils/graph/digraph/algorithms/get_topological_ordering.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/rewrite_value_labels.h" #include "utils/graph/open_dataflow_graph/algorithms/get_inputs.h" +#include "utils/nonnegative_int/num_elements.h" namespace FlexFlow { @@ -32,7 +33,7 @@ LabelledOpenDataflowGraphView ParallelLayerAttrs n_attrs = g.at(n); std::vector incoming_tensor_roles = - get_incoming_tensor_roles(n_attrs.op_attrs, incoming_shapes.size()); + get_incoming_tensor_roles(n_attrs.op_attrs, num_elements(incoming_shapes)); auto incoming_shapes_with_role = [&](IncomingTensorRole role) -> std::vector { diff --git a/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc b/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc index 6f41772a9e..0486b4321b 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/eval_list_access.cc @@ -1,9 +1,10 @@ #include "substitutions/operator_pattern/eval_list_access.h" #include "substitutions/operator_pattern/get_attribute.h" -#include "utils/containers/at_idx.h" +#include "utils/containers/try_at_idx.h" #include "utils/containers/make.h" #include "utils/containers/transform.h" #include "utils/overload.h" +#include namespace FlexFlow { @@ -22,13 +23,13 @@ std::optional using T = std::decay_t; if constexpr (std::is_same_v>) { - return transform(at_idx(v, acc.index), + return transform(try_at_idx(v, acc.index), make()); } else if constexpr (std::is_same_v>) { - return transform(at_idx(v, acc.index), + return transform(try_at_idx(v, acc.index), make()); } else { - throw mk_runtime_error("Invalid operand"); + PANIC("Invalid operand"); } }); } diff --git a/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc b/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc index cb733e16ff..f7fce1aca7 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/get_attribute.cc @@ -384,7 +384,7 @@ std::optional get_attribute(TransposeAttrs const &p, case OperatorAttributeKey::OP_TYPE: return OperatorAttributeValue{get_op_type(p)}; case OperatorAttributeKey::PERMUTATION: - return OperatorAttributeValue{vector_of(p.perm)}; + return OperatorAttributeValue{p.permutation}; default: return std::nullopt; } diff --git a/lib/substitutions/src/substitutions/operator_pattern/satisfies_constraint.cc b/lib/substitutions/src/substitutions/operator_pattern/satisfies_constraint.cc index 194ae49255..96c33989fe 100644 --- a/lib/substitutions/src/substitutions/operator_pattern/satisfies_constraint.cc +++ b/lib/substitutions/src/substitutions/operator_pattern/satisfies_constraint.cc @@ -1,5 +1,6 @@ #include "substitutions/operator_pattern/satisfies_constraint.h" #include "substitutions/operator_pattern/operator_attribute_expr.h" +#include namespace FlexFlow { @@ -17,9 +18,7 @@ bool operator_satisfies_constraint( case ConstraintType::EQUAL: return expr_val.value() == constraint.attribute_value; default: - throw mk_runtime_error( - fmt::format("Unknown constraint type {}", - static_cast(constraint.constraint_type))); + PANIC("Unknown constraint type", constraint.constraint_type); } } diff --git a/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc b/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc index cf5a1e17f9..11ef85984c 100644 --- a/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc +++ b/lib/substitutions/src/substitutions/output_graph/materialize_operator_from_attrs_map.cc @@ -1,6 +1,7 @@ #include "substitutions/output_graph/materialize_operator_from_attrs_map.h" #include "utils/containers/contains_key.h" #include "utils/fmt/unordered_map.h" +#include namespace FlexFlow { @@ -16,8 +17,7 @@ struct Accessor { if (contains_key(this->m, k)) { return this->m.at(k).get(); } else { - throw mk_runtime_error( - fmt::format("Could not find key {} in attrs map: {}", k, this->m)); + PANIC("Could not find key in attrs map", k, this->m); } } }; @@ -151,8 +151,7 @@ PCGOperatorAttrs materialize_operator_from_attrs_map( case OperatorType::PIPELINE: case OperatorType::FUSED_PARALLEL: default: - throw mk_runtime_error( - fmt::format("Unsupported operator type {}", op_type)); + PANIC("Unsupported operator type", op_type); } } diff --git a/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc b/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc index f6b90ef054..d8c74ade5c 100644 --- a/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc +++ b/lib/substitutions/src/substitutions/output_graph/output_operator_attrs_assignment.cc @@ -3,7 +3,8 @@ #include "substitutions/output_graph/materialize_operator_from_attrs_map.h" #include "substitutions/output_graph/output_operator_attribute_expr.h" #include "utils/containers/map_values.h" -#include "utils/containers/merge_maps.h" +#include "utils/containers/binary_merge_maps_with_right_dominating.h" +#include "utils/exception.h" namespace FlexFlow { @@ -36,7 +37,7 @@ PCGOperatorAttrs materialize_output_operator_from_attrs_assignment( std::unordered_map joined_attrs_map = - merge_map_right_dominates(template_attrs_map, assignments_attrs_map); + binary_merge_maps_with_right_dominating(template_attrs_map, assignments_attrs_map); return materialize_operator_from_attrs_map(joined_attrs_map); } diff --git a/lib/substitutions/src/substitutions/pcg_pattern.cc b/lib/substitutions/src/substitutions/pcg_pattern.cc index a0af875848..0c91af774a 100644 --- a/lib/substitutions/src/substitutions/pcg_pattern.cc +++ b/lib/substitutions/src/substitutions/pcg_pattern.cc @@ -5,6 +5,7 @@ #include "substitutions/tensor_pattern/satisfies_pattern.h" #include "substitutions/unlabelled/find_pattern_matches.h" #include "substitutions/unlabelled/pattern_value.h" +#include "utils/bidict/algorithms/transform_values.h" #include "utils/containers/map_values.h" #include "utils/containers/transform.h" #include "utils/graph/dataflow_graph/algorithms.h" @@ -47,8 +48,9 @@ std::vector auto pcg_match_from_unlabelled_match = [](UnlabelledDataflowGraphPatternMatch const &m) { return PCGPatternMatch{ - map_values(m.node_assignment, - [](Node const &n) { return parallel_layer_guid_t{n}; }), + transform_values( + m.node_assignment, + [](Node const &n) { return parallel_layer_guid_t{n}; }), map_values(m.input_assignment, [](OpenDataflowValue const &i) { return open_parallel_tensor_guid_t{i}; diff --git a/lib/substitutions/src/substitutions/pcg_pattern_match.cc b/lib/substitutions/src/substitutions/pcg_pattern_match.cc index b701be65cf..2281d46514 100644 --- a/lib/substitutions/src/substitutions/pcg_pattern_match.cc +++ b/lib/substitutions/src/substitutions/pcg_pattern_match.cc @@ -3,6 +3,7 @@ #include "substitutions/sub_parallel_computation_graph.h" #include "utils/bidict/algorithms/bidict_from_keys_and_values.h" #include "utils/bidict/algorithms/merge_disjoint_bidicts.h" +#include "utils/bidict/algorithms/transform_values.h" #include "utils/containers/map_values.h" #include "utils/containers/zip.h" @@ -36,7 +37,7 @@ bidict UnlabelledDataflowGraphPatternMatch get_unlabelled_pattern_match(PCGPatternMatch const &match) { return UnlabelledDataflowGraphPatternMatch{ - map_values( + transform_values( match.node_assignment, [](parallel_layer_guid_t const &l) { return l.raw_graph_node; }), map_values(match.input_assignment, diff --git a/lib/substitutions/src/substitutions/substitution.cc b/lib/substitutions/src/substitutions/substitution.cc index 874700d303..f0d4dd7c26 100644 --- a/lib/substitutions/src/substitutions/substitution.cc +++ b/lib/substitutions/src/substitutions/substitution.cc @@ -3,6 +3,7 @@ #include "substitutions/pcg_pattern.h" #include "utils/bidict/algorithms/left_entries.h" #include "utils/bidict/algorithms/right_entries.h" +#include "utils/bidict/algorithms/transform.h" #include "utils/containers/map_values.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/find_isomorphism.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/rewrite_node_labels.h" diff --git a/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc b/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc index 7bfb1f5e9e..d570f23313 100644 --- a/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc +++ b/lib/substitutions/src/substitutions/tensor_pattern/eval_list_access.cc @@ -2,6 +2,7 @@ #include "substitutions/tensor_pattern/get_attribute.h" #include "utils/containers/at_idx.h" #include "utils/overload.h" +#include namespace FlexFlow { @@ -12,11 +13,9 @@ TensorAttributeValue return from_attr.visit(overload{ [&](std::vector const &v) -> TensorAttributeValue { - return TensorAttributeValue{at_idx(v, acc.index).value()}; - }, - [](auto &&) -> TensorAttributeValue { - throw mk_runtime_error("Invalid operand"); + return TensorAttributeValue{at_idx(v, acc.index)}; }, + [](auto &&x) -> TensorAttributeValue { PANIC("Invalid operand", x); }, }); } diff --git a/lib/substitutions/src/substitutions/tensor_pattern/satisfies_constraint.cc b/lib/substitutions/src/substitutions/tensor_pattern/satisfies_constraint.cc index 974bfcabc0..e2f2e211fa 100644 --- a/lib/substitutions/src/substitutions/tensor_pattern/satisfies_constraint.cc +++ b/lib/substitutions/src/substitutions/tensor_pattern/satisfies_constraint.cc @@ -1,5 +1,6 @@ #include "substitutions/tensor_pattern/satisfies_constraint.h" #include "substitutions/tensor_pattern/tensor_attribute_expr.h" +#include namespace FlexFlow { @@ -13,9 +14,7 @@ bool parallel_tensor_satisfies_constraint( case ConstraintType::EQUAL: return expr_val == constraint.attribute_value; default: - throw mk_runtime_error( - fmt::format("Unknown constraint type {}", - static_cast(constraint.constraint_type))); + PANIC("Unknown constraint type", constraint.constraint_type); } } diff --git a/lib/substitutions/src/substitutions/unity_substitution_set.cc b/lib/substitutions/src/substitutions/unity_substitution_set.cc index 4b00cdd95f..da784bb739 100644 --- a/lib/substitutions/src/substitutions/unity_substitution_set.cc +++ b/lib/substitutions/src/substitutions/unity_substitution_set.cc @@ -1,5 +1,5 @@ #include "substitutions/unity_substitution_set.h" -#include "pcg/machine_specification.h" +#include "pcg/machine_compute_specification.h" #include "substitutions/operator_pattern/operator_attribute_constraint.h" #include "substitutions/output_graph/output_operator_attrs_assignment.h" #include "substitutions/substitution_builder.h" @@ -11,7 +11,7 @@ namespace FlexFlow { std::vector - get_substitution_set(MachineSpecification const &resources) { + get_substitution_set(MachineComputeSpecification const &resources) { std::vector substitutions; for (nonnegative_int num_dims : nonnegative_range(1_n, nonnegative_int{MAX_TENSOR_DIM})) { diff --git a/lib/substitutions/src/substitutions/unlabelled/unlabelled_graph_pattern.cc b/lib/substitutions/src/substitutions/unlabelled/unlabelled_graph_pattern.cc index 84e0d91fee..40397c3c05 100644 --- a/lib/substitutions/src/substitutions/unlabelled/unlabelled_graph_pattern.cc +++ b/lib/substitutions/src/substitutions/unlabelled/unlabelled_graph_pattern.cc @@ -1,6 +1,7 @@ #include "substitutions/unlabelled/unlabelled_graph_pattern.h" #include "substitutions/unlabelled/pattern_edge.h" #include "substitutions/unlabelled/pattern_value.h" +#include "utils/bidict/algorithms/transform.h" #include "utils/containers/transform.h" #include "utils/graph/dataflow_graph/algorithms.h" #include "utils/graph/digraph/algorithms/get_topological_ordering.h" diff --git a/lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc b/lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc index 2bf72d3224..2623b0987f 100644 --- a/lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc +++ b/lib/substitutions/test/src/substitutions/apply_substitution/perform_shape_inference.cc @@ -69,7 +69,7 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelTensorShape n1_weight_shape = throw_if_unexpected(get_projection_shape(n1_op_attrs, i0_shape)); ParallelTensorShape n2_output_shape = - throw_if_unexpected(get_output_shape(n2_op_attrs, n1_output_shape)); + get_output_shape(n2_op_attrs, n1_output_shape); ParallelLayerAttrs n1_weight_attrs = ParallelLayerAttrs{ PCGOperatorAttrs{ diff --git a/lib/substitutions/test/src/substitutions/unity_substitution_set.cc b/lib/substitutions/test/src/substitutions/unity_substitution_set.cc index c86cb7e51f..ea8c8529ba 100644 --- a/lib/substitutions/test/src/substitutions/unity_substitution_set.cc +++ b/lib/substitutions/test/src/substitutions/unity_substitution_set.cc @@ -5,12 +5,10 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_substitution_set") { - MachineSpecification machine_spec = MachineSpecification{ + MachineComputeSpecification machine_spec = MachineComputeSpecification{ /*num_nodes=*/2_p, /*num_cpus_per_node=*/8_p, /*num_gpus_per_node=*/4_p, - /*inter_node_bandwidth=*/0.0, - /*intra_node_bandwidth=*/0.0, }; std::vector result = get_substitution_set(machine_spec); diff --git a/lib/task-spec/include/task-spec/arg_ref.h b/lib/task-spec/include/task-spec/arg_ref.h index 8d3402c578..a0b4717f3a 100644 --- a/lib/task-spec/include/task-spec/arg_ref.h +++ b/lib/task-spec/include/task-spec/arg_ref.h @@ -1,10 +1,5 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_ARG_REF_H -#define _FLEXFLOW_LOCAL_EXECUTION_ARG_REF_H - -#include "kernels/ff_handle.h" -// #include "task-spec/serialization.h -#include "utils/type_index.h" -#include "utils/visitable.h" +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_ARG_REF_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_ARG_REF_H namespace FlexFlow { @@ -13,80 +8,6 @@ struct ArgRef { LABEL_TYPE ref_type; }; -template -struct ArgRefSpec { -public: - ArgRefSpec() = delete; - - template - bool holds() const { - return matches(this->type_idx); - } - - LABEL_TYPE const &get_ref_type() const { - return this->ref_type; - } - - std::type_index get_type_index() const { - return this->type_idx; - } - - bool operator==(ArgRefSpec const &other) const { - return this->tie() == other.tie(); - } - - bool operator!=(ArgRefSpec const &other) const { - return this->tie() != other.tie(); - } - - template - static ArgRefSpec create(ArgRef const &r) { - // static_assert(is_serializable::value, "Type must be serializeable"); - - return ArgRefSpec(get_type_index_for_type(), r.ref_type); - } - -private: - ArgRefSpec(std::type_index const &type_index, LABEL_TYPE ref_type) - : type_idx(type_index), ref_type(ref_type) {} - - std::type_index type_idx; - LABEL_TYPE ref_type; - - std::tuple - tie() const { - return std::tie(this->type_idx, this->ref_type); - } - friend struct std::hash>; -}; - -template -std::string format_as(ArgRefSpec const &x) { - std::ostringstream oss; - oss << ""; - return oss.str(); -} - -template -std::ostream &operator<<(std::ostream &s, ArgRefSpec const &x) { - return (s << fmt::to_string(x)); -} - } // namespace FlexFlow -namespace std { - -template -struct hash<::FlexFlow::ArgRefSpec> { - size_t operator()(::FlexFlow::ArgRefSpec const &s) const { - size_t result = 0; - ::FlexFlow::hash_combine(result, s.type_idx, s.get_ref_type()); - return result; - } -}; - -} // namespace std - #endif diff --git a/lib/task-spec/include/task-spec/arg_ref_spec.h b/lib/task-spec/include/task-spec/arg_ref_spec.h new file mode 100644 index 0000000000..67183a9c42 --- /dev/null +++ b/lib/task-spec/include/task-spec/arg_ref_spec.h @@ -0,0 +1,89 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_ARG_REF_SPEC_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_ARG_REF_SPEC_H + +#include "utils/type_index.h" +#include "utils/hash-utils.h" +#include "task-spec/arg_ref.h" + +namespace FlexFlow { + +template +struct ArgRefSpec { +public: + ArgRefSpec() = delete; + + template + bool holds() const { + return matches(this->type_idx); + } + + LABEL_TYPE const &get_ref_type() const { + return this->ref_type; + } + + std::type_index get_type_index() const { + return this->type_idx; + } + + bool operator==(ArgRefSpec const &other) const { + return this->tie() == other.tie(); + } + + bool operator!=(ArgRefSpec const &other) const { + return this->tie() != other.tie(); + } + + template + static ArgRefSpec create(ArgRef const &r) { + // static_assert(is_serializable::value, "Type must be serializeable"); + + return ArgRefSpec(get_type_index_for_type(), r.ref_type); + } + +private: + ArgRefSpec(std::type_index const &type_index, LABEL_TYPE ref_type) + : type_idx(type_index), ref_type(ref_type) {} + +private: + std::type_index type_idx; + LABEL_TYPE ref_type; + +private: + std::tuple + tie() const { + return std::tie(this->type_idx, this->ref_type); + } + friend struct std::hash>; +}; + +template +std::string format_as(ArgRefSpec const &x) { + std::ostringstream oss; + oss << ""; + return oss.str(); +} + +template +std::ostream &operator<<(std::ostream &s, ArgRefSpec const &x) { + return (s << fmt::to_string(x)); +} + +} // namespace FlexFlow + +namespace std { + +template +struct hash<::FlexFlow::ArgRefSpec> { + size_t operator()(::FlexFlow::ArgRefSpec const &s) const { + size_t result = 0; + ::FlexFlow::hash_combine(result, s.type_idx, s.get_ref_type()); + return result; + } +}; + +} // namespace std + + +#endif diff --git a/lib/task-spec/include/task-spec/runtime_arg_ref_type.enum.toml b/lib/task-spec/include/task-spec/argumentless_runtime_arg_ref_type.enum.toml similarity index 83% rename from lib/task-spec/include/task-spec/runtime_arg_ref_type.enum.toml rename to lib/task-spec/include/task-spec/argumentless_runtime_arg_ref_type.enum.toml index e33eeebc56..2cad3f21fb 100644 --- a/lib/task-spec/include/task-spec/runtime_arg_ref_type.enum.toml +++ b/lib/task-spec/include/task-spec/argumentless_runtime_arg_ref_type.enum.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "RuntimeArgRefType" +name = "ArgumentlessRuntimeArgRefType" features = [ "fmt", ] diff --git a/lib/task-spec/include/task-spec/config.h b/lib/task-spec/include/task-spec/config.h deleted file mode 100644 index ff7c4af5a5..0000000000 --- a/lib/task-spec/include/task-spec/config.h +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _FLEXFLOW_LOCAL_EXECUTION_CONFIG_H_ -#define _FLEXFLOW_LOCAL_EXECUTION_CONFIG_H_ - -#include "utils/fmt.h" -#include "utils/visitable.h" -#include - -namespace FlexFlow { - -enum class ComputationMode { - TRAINING, - INFERENCE, -}; - -// ======================================================== -// Define Runtime Constants -// ======================================================== -// Pre-assigned const flags -#define MAP_TO_FB_MEMORY 0xABCD0000 -#define MAP_TO_ZC_MEMORY 0xABCE0000 - -struct FFInitInfo : public use_visitable_cmp { - size_t workSpaceSize; - bool allowTensorOpMathConversion; -}; - -using legion_mapping_tag_id_t = unsigned long; - -struct FFConfig : public use_visitable_cmp { -public: - enum PreservedIDs { - InvalidID = 0, - DataParallelism_GPU = 1, - // DataParallelism_GPU_2D = 2, - // DataParallelism_GPU_3D = 3, - // DataParallelism_GPU_4D = 4, - // DataParallelism_GPU_5D = 5, - DataParallelism_CPU = 11, - // DataParallelism_CPU_2D = 12, - // DataParallelism_CPU_3D = 13, - // DataParallelism_CPU_4D = 14, - // DataParallelism_CPU_5D = 15, - }; - - FFConfig() = default; - static legion_mapping_tag_id_t get_hash_id(std::string const &pcname); - -public: - int epochs = 1; - int batchSize = 64; - int numNodes = 1; - int cpusPerNode = 0; - int workersPerNode = 0; - float learningRate = 0.01f; - float weightDecay = 0.0001f; - size_t workSpaceSize = (size_t)1 * 1024 * 1024 * 1024; // 2GB - bool profiling = false; - bool perform_fusion = false; - size_t simulator_work_space_size = (size_t)2 * 1024 * 1024 * 1024; // 2GB - size_t search_budget = -1; - float search_alpha = 1.2f; - bool search_overlap_backward_update = false; - ComputationMode computationMode = ComputationMode::TRAINING; - // Control parallelizable dimensions - bool only_data_parallel = false; - bool enable_parameter_parallel = false; - bool enable_inplace_optimizations = false; - // Control Tensor Op Math Conversion - bool allow_tensor_op_math_conversion = false; - std::optional dataset_path = std::nullopt; - std::optional export_strategy_computation_graph_file = - std::nullopt; - bool include_costs_dot_graph = false; - std::optional substitution_json_path = std::nullopt; - int machine_model_version = 0; - std::optional machine_model_file = std::nullopt; - int simulator_segment_size = 16777216; // 16 MB - int simulator_max_num_segments = 1; - std::optional search_num_nodes = std::nullopt; - std::optional search_num_workers = std::nullopt; - int base_optimize_threshold = 10; - bool enable_control_replication = true; - // The default python data loader type is 2 to enable control replication - int python_data_loader_type = 2; -}; - -struct FFIterationConfig { - FFIterationConfig() = delete; - void reset(); - int seq_length; -}; - -FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(FFIterationConfig, seq_length); - -enum FieldIDs { - FID_DATA, -}; - -} // namespace FlexFlow - -VISITABLE_STRUCT(::FlexFlow::FFInitInfo, - workSpaceSize, - allowTensorOpMathConversion); -MAKE_VISIT_HASHABLE(::FlexFlow::FFInitInfo); - -VISITABLE_STRUCT(::FlexFlow::FFConfig, - epochs, - batchSize, - numNodes, - cpusPerNode, - workersPerNode, - learningRate, - weightDecay, - workSpaceSize, - profiling, - perform_fusion, - simulator_work_space_size, - search_budget, - search_alpha, - search_overlap_backward_update, - computationMode, - only_data_parallel, - enable_parameter_parallel, - enable_inplace_optimizations, - allow_tensor_op_math_conversion, - dataset_path, - export_strategy_computation_graph_file, - include_costs_dot_graph, - substitution_json_path, - machine_model_version, - machine_model_file, - simulator_segment_size, - simulator_max_num_segments, - search_num_nodes, - search_num_workers, - base_optimize_threshold, - enable_control_replication, - python_data_loader_type); - -namespace fmt { - -template <> -struct formatter<::FlexFlow::ComputationMode> : formatter { - template - auto format(::FlexFlow::ComputationMode m, FormatContext &ctx) const - -> decltype(ctx.out()) { - using namespace FlexFlow; - - string_view name = "unknown"; - switch (m) { - case ComputationMode::TRAINING: - name = "Training"; - break; - case ComputationMode::INFERENCE: - name = "Inference"; - break; - } - return formatter::format(name, ctx); - } -}; - -} // namespace fmt - -#endif diff --git a/lib/task-spec/include/task-spec/device_specific.h b/lib/task-spec/include/task-spec/device_specific.h index 3ef017f704..29d87c78d9 100644 --- a/lib/task-spec/include/task-spec/device_specific.h +++ b/lib/task-spec/include/task-spec/device_specific.h @@ -1,19 +1,17 @@ #ifndef _FLEXFLOW_LOCAL_EXECUTION_DEVICE_SPECIFIC_H #define _FLEXFLOW_LOCAL_EXECUTION_DEVICE_SPECIFIC_H +#include "pcg/device_id_t.dtg.h" #include "task-spec/serialization.h" -#include "utils/exception.h" namespace FlexFlow { template struct DeviceSpecific { - DeviceSpecific() = delete; template - static DeviceSpecific create(Args &&...args) { - size_t device_idx = 0; + static DeviceSpecific create(device_id_t device_idx, Args &&...args) { return DeviceSpecific(std::make_shared(std::forward(args)...), device_idx); } @@ -26,26 +24,19 @@ struct DeviceSpecific { return this->tie() != other.tie(); } - T const *get(size_t curr_device_idx) const { - if (curr_device_idx != this->device_idx) { - throw mk_runtime_error( - fmt::format("Invalid access to DeviceSpecific: attempted " - "device_idx {} != correct device_idx {})", - curr_device_idx, - this->device_idx)); - } + T const *get(device_id_t curr_device_idx) const { + ASSERT(curr_device_idx == this->device_idx); return (T const *)this->ptr.get(); } - - // TODO: can modify ptr - private: - DeviceSpecific(std::shared_ptr ptr, size_t device_idx) + DeviceSpecific(std::shared_ptr ptr, device_id_t device_idx) : ptr(ptr), device_idx(device_idx) {} +private: std::shared_ptr ptr; - size_t device_idx; + device_id_t device_idx; +private: std::tuple tie() const { return std::tie(this->ptr, this->device_idx); } diff --git a/lib/task-spec/include/task-spec/device_specific_device_states.variant.toml b/lib/task-spec/include/task-spec/device_specific_per_device_op_state.variant.toml similarity index 98% rename from lib/task-spec/include/task-spec/device_specific_device_states.variant.toml rename to lib/task-spec/include/task-spec/device_specific_per_device_op_state.variant.toml index b77850c50d..9f6d9aa3cd 100644 --- a/lib/task-spec/include/task-spec/device_specific_device_states.variant.toml +++ b/lib/task-spec/include/task-spec/device_specific_per_device_op_state.variant.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "DeviceSpecificDeviceStates" +name = "DeviceSpecificPerDeviceOpState" features = [ "eq", ] diff --git a/lib/task-spec/include/task-spec/ff_config.struct.toml b/lib/task-spec/include/task-spec/ff_config.struct.toml new file mode 100644 index 0000000000..959e96092d --- /dev/null +++ b/lib/task-spec/include/task-spec/ff_config.struct.toml @@ -0,0 +1,115 @@ +namespace = "FlexFlow" +name = "FFConfig" +features = [ + "eq", + "ord", + "hash", + "fmt", + "rapidcheck", + "json", +] + +includes = [ + "utils/positive_int/positive_int.h", + "utils/nonnegative_int/nonnegative_int.h", + "", + "", +] + +src_includes = [ + "utils/rapidcheck/optional.h", + "utils/json/optional.h", + "utils/fmt/optional.h", +] + +[[fields]] +name = "epochs" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "batch_size" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "num_nodes" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "cpus_per_node" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "gpus_per_node" +type = "::FlexFlow::positive_int" + +[[fields]] +name = "learning_rate" +type = "float" + +[[fields]] +name = "weight_decay" +type = "float" + +[[fields]] +name = "workspace_size" +type = "::FlexFlow::nonnegative_int" + +[[fields]] +name = "enable_profiling" +type = "bool" + +[[fields]] +name = "perform_fusion" +type = "bool" + +[[fields]] +name = "simulator_workspace_size" +type = "::FlexFlow::nonnegative_int" + +[[fields]] +name = "search_budget" +type = "std::optional<::FlexFlow::positive_int>" + +[[fields]] +name = "search_alpha" +type = "float" + +[[fields]] +name = "search_overlap_backward_update" +type = "bool" + +[[fields]] +name = "only_data_parallel" +type = "bool" + +[[fields]] +name = "enable_parameter_parallel" +type = "bool" + +[[fields]] +name = "enable_inplace_optimizations" +type = "bool" + +[[fields]] +name = "allow_tensor_op_math_conversion" +type = "bool" + +[[fields]] +name = "dataset_path" +type = "std::optional" + +[[fields]] +name = "export_strategy_computation_graph_file" +type = "std::optional" + +[[fields]] +name = "include_costs_dot_graph" +type = "bool" + +[[fields]] +name = "substitution_json_path" +type = "std::optional" + +[[fields]] +name = "base_optimize_threshold" +type = "::FlexFlow::positive_int" diff --git a/lib/task-spec/include/task-spec/ff_init_info.struct.toml b/lib/task-spec/include/task-spec/ff_init_info.struct.toml new file mode 100644 index 0000000000..dbb8b76a1d --- /dev/null +++ b/lib/task-spec/include/task-spec/ff_init_info.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "FFInitInfo" +features = [ + "eq", + "ord", + "hash", + "fmt", + "rapidcheck", + "json", +] + +includes = [ + "utils/nonnegative_int/nonnegative_int.h", +] + +[[fields]] +name = "workspace_size" +type = "::FlexFlow::nonnegative_int" + +[[fields]] +name = "allow_tensor_op_math_conversion" +type = "bool" diff --git a/lib/task-spec/include/task-spec/ff_iteration_config.struct.toml b/lib/task-spec/include/task-spec/ff_iteration_config.struct.toml new file mode 100644 index 0000000000..6c9e59ba8a --- /dev/null +++ b/lib/task-spec/include/task-spec/ff_iteration_config.struct.toml @@ -0,0 +1,18 @@ +namespace = "FlexFlow" +name = "FFIterationConfig" +features = [ + "eq", + "ord", + "hash", + "fmt", + "rapidcheck", + "json", +] + +includes = [ + "utils/positive_int/positive_int.h", +] + +[[fields]] +name = "seq_length" +type = "::FlexFlow::positive_int" diff --git a/lib/task-spec/include/task-spec/forward_tensor_source.h b/lib/task-spec/include/task-spec/forward_tensor_source.h deleted file mode 100644 index 7adde6e145..0000000000 --- a/lib/task-spec/include/task-spec/forward_tensor_source.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_FORWARD_TENSOR_SOURCE_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_FORWARD_TENSOR_SOURCE_H - -#include "task-spec/forward_tensor_guid_t.dtg.h" - -namespace FlexFlow { - -struct ForwardTensorSource { -public: - ForwardTensorSource(); - - forward_tensor_guid_t new_forward_tensor(); - - void reset(); - -private: - static int next_available_forward_tensor_id; -}; - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/fwb_op_task_type.enum.toml b/lib/task-spec/include/task-spec/fwb_op_task_type.enum.toml new file mode 100644 index 0000000000..25ef8e3034 --- /dev/null +++ b/lib/task-spec/include/task-spec/fwb_op_task_type.enum.toml @@ -0,0 +1,14 @@ +namespace = "FlexFlow" +name = "FwbOpTaskType" +features = [ + "hash", + "fmt", + "rapidcheck", + "json", +] + +[[values]] +name = "FWD" + +[[values]] +name = "BWD" diff --git a/lib/task-spec/include/task-spec/fwb_op_task_type.h b/lib/task-spec/include/task-spec/fwb_op_task_type.h new file mode 100644 index 0000000000..6c6e0cd9e1 --- /dev/null +++ b/lib/task-spec/include/task-spec/fwb_op_task_type.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_FWB_OP_TASK_TYPE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_FWB_OP_TASK_TYPE_H + +#include "task-spec/fwb_op_task_type.dtg.h" +#include "task-spec/op_task_type.dtg.h" +#include + +namespace FlexFlow { + +std::optional + op_task_type_from_fwb_op_task_type(FwbOpTaskType); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/fwb_tensor_slot_binding.struct.toml b/lib/task-spec/include/task-spec/fwb_tensor_slot_binding.struct.toml new file mode 100644 index 0000000000..58bef0e396 --- /dev/null +++ b/lib/task-spec/include/task-spec/fwb_tensor_slot_binding.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "FwbTensorSlotBinding" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/fwb_tensor_slot_id_t.dtg.h", + "task-spec/op_tensor_spec.dtg.h", +] + +[[fields]] +name = "slot" +type = "::FlexFlow::fwb_tensor_slot_id_t" + +[[fields]] +name = "bound" +type = "::FlexFlow::OpTensorSpec" diff --git a/lib/task-spec/include/task-spec/slot_grad_id.struct.toml b/lib/task-spec/include/task-spec/fwb_tensor_slot_id_t.struct.toml similarity index 89% rename from lib/task-spec/include/task-spec/slot_grad_id.struct.toml rename to lib/task-spec/include/task-spec/fwb_tensor_slot_id_t.struct.toml index a6533ea884..f866b1c7de 100644 --- a/lib/task-spec/include/task-spec/slot_grad_id.struct.toml +++ b/lib/task-spec/include/task-spec/fwb_tensor_slot_id_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "SlotGradId" +name = "fwb_tensor_slot_id_t" features = [ "eq", "ord", diff --git a/lib/task-spec/include/task-spec/fwb_tensor_type.enum.toml b/lib/task-spec/include/task-spec/fwb_tensor_type.enum.toml new file mode 100644 index 0000000000..68c5358f72 --- /dev/null +++ b/lib/task-spec/include/task-spec/fwb_tensor_type.enum.toml @@ -0,0 +1,14 @@ +namespace = "FlexFlow" +name = "FwbTensorType" +features = [ + "hash", + "fmt", + "rapidcheck", + "json", +] + +[[values]] +name = "FORWARD" + +[[values]] +name = "GRADIENT" diff --git a/lib/task-spec/include/task-spec/fwd_bwd_op_task_impl_function.h b/lib/task-spec/include/task-spec/fwd_bwd_op_task_impl_function.h index 3620ff87cb..cf459b8517 100644 --- a/lib/task-spec/include/task-spec/fwd_bwd_op_task_impl_function.h +++ b/lib/task-spec/include/task-spec/fwd_bwd_op_task_impl_function.h @@ -2,12 +2,13 @@ #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_FWD_BWD_OP_TASK_IMPL_FUNCTION_H #include "task-spec/task_argument_accessor.h" +#include "utils/units/milliseconds_t.h" namespace FlexFlow { struct FwdBwdOpTaskImplFunction { - std::optional (*function_ptr)(TaskArgumentAccessor const &); + std::optional (*function_ptr)(TaskArgumentAccessor const &); bool operator==(FwdBwdOpTaskImplFunction const &) const; bool operator!=(FwdBwdOpTaskImplFunction const &) const; diff --git a/lib/task-spec/include/task-spec/generic_task_impl_function.h b/lib/task-spec/include/task-spec/generic_task_impl_function.h index 31bf132e4f..afe7fba40b 100644 --- a/lib/task-spec/include/task-spec/generic_task_impl_function.h +++ b/lib/task-spec/include/task-spec/generic_task_impl_function.h @@ -1,7 +1,7 @@ #ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_GENERIC_TASK_IMPL_FUNCTION_H #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_GENERIC_TASK_IMPL_FUNCTION_H -#include "task-spec/device_specific_device_states.dtg.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" #include "task-spec/task_argument_accessor.h" namespace FlexFlow { diff --git a/lib/task-spec/include/task-spec/gradient_tensor_source.h b/lib/task-spec/include/task-spec/gradient_tensor_source.h deleted file mode 100644 index 14ebf05d43..0000000000 --- a/lib/task-spec/include/task-spec/gradient_tensor_source.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_GRADIENT_TENSOR_SOURCE_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_GRADIENT_TENSOR_SOURCE_H - -#include "task-spec/gradient_tensor_guid_t.dtg.h" - -namespace FlexFlow { - -struct GradientTensorSource { -public: - GradientTensorSource(); - - gradient_tensor_guid_t new_gradient_tensor(); - - void reset(); - -private: - static int next_available_gradient_tensor_id; -}; - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/init_op_task_impl_function.h b/lib/task-spec/include/task-spec/init_op_task_impl_function.h index 97daa7ef56..11358cc2ee 100644 --- a/lib/task-spec/include/task-spec/init_op_task_impl_function.h +++ b/lib/task-spec/include/task-spec/init_op_task_impl_function.h @@ -1,7 +1,7 @@ #ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_INIT_OP_TASK_IMPL_FUNCTION_H #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_INIT_OP_TASK_IMPL_FUNCTION_H -#include "task-spec/device_specific_device_states.dtg.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" #include "task-spec/task_argument_accessor.h" namespace FlexFlow { @@ -16,7 +16,7 @@ struct InitOpTaskImplFunction { bool operator>=(InitOpTaskImplFunction const &) const; public: - DeviceSpecificDeviceStates (*function_ptr)(TaskArgumentAccessor const &); + DeviceSpecificPerDeviceOpState (*function_ptr)(TaskArgumentAccessor const &); }; std::string format_as(InitOpTaskImplFunction const &x); diff --git a/lib/task-spec/include/task-spec/itask_argument_accessor.h b/lib/task-spec/include/task-spec/itask_argument_accessor.h index 2e693e7983..d941ef7149 100644 --- a/lib/task-spec/include/task-spec/itask_argument_accessor.h +++ b/lib/task-spec/include/task-spec/itask_argument_accessor.h @@ -2,10 +2,11 @@ #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_ITASK_ARGUMENT_ACCESSOR_H #include "kernels/allocation.h" +#include "pcg/device_id_t.dtg.h" #include "task-spec/concrete_arg_spec.h" #include "task-spec/op_task_signature.h" #include "task-spec/privilege_tensor_accessor.h" -#include "task-spec/tensor_type.dtg.h" +#include "task-spec/training_tensor_type.dtg.h" namespace FlexFlow { @@ -18,12 +19,12 @@ struct ITaskArgumentAccessor { virtual GenericTensorAccessor get_tensor(slot_id_t slot, Permissions priv, - TensorType tensor_type) const = 0; + TrainingTensorType tensor_type) const = 0; virtual VariadicGenericTensorAccessor get_variadic_tensor( - slot_id_t slot, Permissions priv, TensorType tensor_type) const = 0; + slot_id_t slot, Permissions priv, TrainingTensorType tensor_type) const = 0; virtual Allocator get_allocator() const = 0; - virtual size_t get_device_idx() const = 0; + virtual device_id_t get_device_idx() const = 0; }; CHECK_RC_COPY_VIRTUAL_COMPLIANT(ITaskArgumentAccessor); diff --git a/lib/task-spec/include/task-spec/loss_functions.h b/lib/task-spec/include/task-spec/loss_functions.h index a5f5886caa..1b812c1be8 100644 --- a/lib/task-spec/include/task-spec/loss_functions.h +++ b/lib/task-spec/include/task-spec/loss_functions.h @@ -17,21 +17,21 @@ #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_LOSS_FUNCTIONS_H #include "op-attrs/ops/loss_functions.h" -#include "task-spec/forward_tensor_guid_t.dtg.h" -#include "task-spec/gradient_tensor_guid_t.dtg.h" -#include "task-spec/loss_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_forward_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_gradient_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_loss_tensor_guid_t.dtg.h" #include "task-spec/task_impl_function.dtg.h" -#include "task-spec/task_invocation.dtg.h" -#include "task-spec/task_signature.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/runtime_task_signature.h" namespace FlexFlow { TaskImplFunction get_loss_bwd_task_impl(); -TaskSignature get_loss_bwd_signature(); -TaskInvocation backward(LossAttrs const &, - forward_tensor_guid_t logit, - gradient_tensor_guid_t logit_grad, - loss_tensor_guid_t label); +RuntimeTaskSignature get_loss_bwd_signature(); +RuntimeTaskInvocation loss_attrs_backward(LossAttrs const &, + symbolic_forward_tensor_guid_t logit, + symbolic_gradient_tensor_guid_t logit_grad, + symbolic_loss_tensor_guid_t label); } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/lower_op_task_invocation_to_runtime_task_invocation.h b/lib/task-spec/include/task-spec/lower_op_task_invocation_to_runtime_task_invocation.h new file mode 100644 index 0000000000..2b17a76664 --- /dev/null +++ b/lib/task-spec/include/task-spec/lower_op_task_invocation_to_runtime_task_invocation.h @@ -0,0 +1,56 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_LOWER_OP_TASK_INVOCATION_TO_RUNTIME_TASK_INVOCATION_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_LOWER_OP_TASK_INVOCATION_TO_RUNTIME_TASK_INVOCATION_H + +#include "task-spec/device_specific_per_device_op_state.dtg.h" +#include "task-spec/op_arg_ref_spec.h" +#include "task-spec/op_task_invocation.h" +#include "task-spec/runtime_arg_config.dtg.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.dtg.h" +#include "task-spec/symbolic_training_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.dtg.h" +#include "task-spec/fwb_tensor_slot_binding.dtg.h" +#include "task-spec/training_tensor_slot_binding.dtg.h" +#include "task-spec/symbolic_layer_tensor_shape_signature.dtg.h" + +namespace FlexFlow { + +RuntimeTaskInvocation + lower_op_task_invocation_to_runtime_task_invocation( + OpTaskInvocation const &op_task_invocation, + symbolic_layer_guid_t symbolic_layer_guid, + SymbolicLayerTrainingTensorGroupSignatureWithShapes const &layer_signature); + +TrainingTensorSlotBinding + lower_fwb_tensor_binding_to_training_tensor_binding( + SymbolicLayerTrainingTensorGroupSignature const &training_layer_signature, + FwbTensorSlotBinding const &fwb_slot_binding); + +RuntimeArgSpec lower_op_arg_spec_to_runtime_arg_spec( + OpArgSpec const &op_arg_spec, + symbolic_layer_guid_t symbolic_layer_guid, + SymbolicLayerTensorShapeSignature const &op_shape_signature); + +RuntimeArgSpec lower_op_arg_ref_spec_to_runtime_arg_spec( + OpArgRefSpec const &, + symbolic_layer_guid_t symbolic_layer_guid, + SymbolicLayerTensorShapeSignature const &); + +// TODO(@lockshaw)(#pr): this really shouldn't be here +ConcreteArgSpec + lower_runtime_arg_ref_spec_to_concrete_arg_spec( + RuntimeArgRefSpec const &, + RuntimeArgConfig const &r, + DeviceSpecific const &, + std::function(symbolic_layer_guid_t)> const &); + +ConcreteArgSpec lower_argumentless_arg_ref_to_concrete_arg_spec( + ArgumentlessRuntimeArgRefType, + RuntimeArgConfig const &, + DeviceSpecific); + + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/op_arg_ref.h b/lib/task-spec/include/task-spec/op_arg_ref.h index 88882abd46..517929a2e5 100644 --- a/lib/task-spec/include/task-spec/op_arg_ref.h +++ b/lib/task-spec/include/task-spec/op_arg_ref.h @@ -1,5 +1,5 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_OP_ARG_REF_H -#define _FLEXFLOW_LOCAL_EXECUTION_OP_ARG_REF_H +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OP_ARG_REF_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OP_ARG_REF_H #include "op-attrs/parallel_tensor_shape.dtg.h" #include "task-spec/arg_ref.h" @@ -12,8 +12,6 @@ namespace FlexFlow { template using OpArgRef = ArgRef; -using OpArgRefSpec = ArgRefSpec; - template OpArgRef per_device_op_state() { OpArgRefType op_arg_ref_type = OpArgRefType{PerDeviceOpStateRefType{}}; diff --git a/lib/task-spec/include/task-spec/op_arg_ref_spec.h b/lib/task-spec/include/task-spec/op_arg_ref_spec.h new file mode 100644 index 0000000000..97c3519e1d --- /dev/null +++ b/lib/task-spec/include/task-spec/op_arg_ref_spec.h @@ -0,0 +1,13 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OP_ARG_REF_SPEC_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OP_ARG_REF_SPEC_H + +#include "task-spec/op_arg_ref_type.dtg.h" +#include "task-spec/arg_ref_spec.h" + +namespace FlexFlow { + +using OpArgRefSpec = ArgRefSpec; + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/op_arg_spec.variant.toml b/lib/task-spec/include/task-spec/op_arg_spec.variant.toml index a03bc222e8..b2757e24f2 100644 --- a/lib/task-spec/include/task-spec/op_arg_spec.variant.toml +++ b/lib/task-spec/include/task-spec/op_arg_spec.variant.toml @@ -2,17 +2,12 @@ namespace = "FlexFlow" name = "OpArgSpec" features = [ "eq", - # "ord", - # "hash", - # "json", - # "fmt", - # "rapidcheck", ] includes = [ "task-spec/concrete_arg_spec.h", - "task-spec/op_arg_ref.h", - "task-spec/runtime_arg_ref.h", + "task-spec/op_arg_ref_spec.h", + "task-spec/runtime_arg_ref_spec.h", ] [[values]] diff --git a/lib/task-spec/include/task-spec/op_ordered_slot_signature.h b/lib/task-spec/include/task-spec/op_ordered_slot_signature.h new file mode 100644 index 0000000000..b27c259900 --- /dev/null +++ b/lib/task-spec/include/task-spec/op_ordered_slot_signature.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OP_ORDERED_SLOT_SIGNATURE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OP_ORDERED_SLOT_SIGNATURE_H + +#include "task-spec/op_ordered_slot_signature.dtg.h" +#include "task-spec/op_task_binding.h" + +namespace FlexFlow { + +OpOrderedSlotSignature get_op_ordered_slot_signature_for_binding(OpTaskBinding const &, + nonnegative_int num_inputs, + nonnegative_int num_weights, + nonnegative_int num_outputs); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/op_ordered_slot_signature.struct.toml b/lib/task-spec/include/task-spec/op_ordered_slot_signature.struct.toml new file mode 100644 index 0000000000..dce8426fcc --- /dev/null +++ b/lib/task-spec/include/task-spec/op_ordered_slot_signature.struct.toml @@ -0,0 +1,34 @@ +namespace = "FlexFlow" +name = "OpOrderedSlotSignature" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/is_grad.dtg.h", + "task-spec/fwb_tensor_slot_id_t.dtg.h", +] + +src_includes = [ + "utils/ord/vector.h", + "utils/ord/unordered_set.h", + "utils/hash/vector.h", + "utils/hash/unordered_set.h", + "utils/fmt/vector.h", + "utils/fmt/unordered_set.h", +] + +[[fields]] +name = "input_slots" +type = "std::vector>" + +[[fields]] +name = "weight_slots" +type = "std::vector>" + +[[fields]] +name = "output_slots" +type = "std::vector>" diff --git a/lib/task-spec/include/task-spec/op_task_binding.h b/lib/task-spec/include/task-spec/op_task_binding.h index bcfea33877..0ce0e5dc16 100644 --- a/lib/task-spec/include/task-spec/op_task_binding.h +++ b/lib/task-spec/include/task-spec/op_task_binding.h @@ -4,9 +4,10 @@ #include "task-spec/op_arg_ref.h" #include "task-spec/op_arg_spec.dtg.h" #include "task-spec/op_tensor_spec.h" -#include "task-spec/slot_grad_id.dtg.h" +#include "task-spec/fwb_tensor_slot_id_t.dtg.h" #include "task-spec/slot_id_t.dtg.h" #include "task-spec/variadic_tensor_ref.h" +#include "task-spec/runtime_arg_ref.h" namespace FlexFlow { @@ -71,17 +72,18 @@ struct OpTaskBinding { void bind_arg(slot_id_t name, OpArgRef const &ref) { this->insert_arg_spec(name, OpArgSpec{OpArgRefSpec::create(ref)}); } + bool operator==(OpTaskBinding const &other) const; bool operator!=(OpTaskBinding const &other) const; - std::unordered_map const & + std::unordered_map const & get_tensor_bindings() const; std::unordered_map const &get_arg_bindings() const; void bind_from_forward(OpTaskBinding const &fwd); private: - std::unordered_map tensor_bindings; + std::unordered_map tensor_bindings; std::unordered_map arg_bindings; private: @@ -90,7 +92,7 @@ struct OpTaskBinding { tie() const; }; -OpTaskBinding infer_bwd_binding(OpTaskBinding const &fwd); +OpTaskBinding infer_bwd_binding(OpTaskBinding const &); } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/op_task_signature.h b/lib/task-spec/include/task-spec/op_task_signature.h index eba0023906..b2f512796f 100644 --- a/lib/task-spec/include/task-spec/op_task_signature.h +++ b/lib/task-spec/include/task-spec/op_task_signature.h @@ -11,7 +11,6 @@ #include "utils/hash/unordered_map.h" #include "utils/hash/unordered_set.h" #include "utils/type_index.h" -#include "utils/visitable.h" namespace FlexFlow { @@ -89,13 +88,24 @@ struct OpTaskSignature { void set_arg_types(std::unordered_map const &); std::unordered_map get_arg_types() const; + bool operator==(OpTaskSignature const &) const; + bool operator!=(OpTaskSignature const &) const; + +public: OpTaskType type; std::optional return_value; std::unordered_map task_arg_types; std::unordered_set op_tensor_slots; + +private: + std::tuple + tie() const; + + friend ::std::hash<::FlexFlow::OpTaskSignature>; }; -FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION( - OpTaskSignature, type, return_value, task_arg_types, op_tensor_slots); std::string format_as(OpTaskSignature const &x); std::ostream &operator<<(std::ostream &s, OpTaskSignature const &x); @@ -104,4 +114,13 @@ OpTaskSignature infer_bwd_signature(OpTaskSignature const &fwd); } // namespace FlexFlow +namespace std { + +template <> +struct hash<::FlexFlow::OpTaskSignature> { + size_t operator()(::FlexFlow::OpTaskSignature const &) const; +}; + +} // namespace std + #endif diff --git a/lib/task-spec/include/task-spec/op_task_to_task_invocation.h b/lib/task-spec/include/task-spec/op_task_to_task_invocation.h deleted file mode 100644 index 3208e9d049..0000000000 --- a/lib/task-spec/include/task-spec/op_task_to_task_invocation.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_OP_TASK_TO_TASK_INVOCATION_H -#define _FLEXFLOW_LOCAL_EXECUTION_OP_TASK_TO_TASK_INVOCATION_H - -#include "pcg/cg_operator_tensor_shape_signature.dtg.h" -#include "pcg/computation_graph.dtg.h" -#include "pcg/layer_guid_t.dtg.h" -#include "task-spec/device_specific_device_states.dtg.h" -#include "task-spec/op_task_invocation.h" -#include "task-spec/runtime_arg_config.dtg.h" -#include "task-spec/task_invocation.dtg.h" -#include "task-spec/training_layer_plus_context.dtg.h" -#include "task-spec/training_layer_tensor_group_signature.dtg.h" - -namespace FlexFlow { - -TaskInvocation - lower_to_task_invocation(OpTaskInvocation const &op_task_invocation, - TrainingLayerPlusContext const &training_layer, - std::optional const - &device_specific_device_states); - -std::pair lower_tensor_binding( - TrainingLayerTensorGroupSignature const &training_layer_signature, - SlotGradId const &slot_grad_id, - OpTensorSpec const &op_tensor_spec); - -TaskArgSpec lower_to_task_arg_spec( - OpArgSpec const &op_arg_spec, - CGOperatorTensorShapeSignature const &op_shape_signature, - layer_guid_t const &layer_guid, - std::optional const - &device_specific_device_states); - -ConcreteArgSpec lower_to_concrete_arg_spec(RuntimeArgRefSpec const &, - RuntimeArgConfig const &); - -ConcreteArgSpec lower_to_concrete_arg_spec( - OpArgRefSpec const &, - CGOperatorTensorShapeSignature const &, - layer_guid_t const &, - std::optional const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/op_tensor_slot_spec.struct.toml b/lib/task-spec/include/task-spec/op_tensor_slot_spec.struct.toml index 3a388b8559..56036261fc 100644 --- a/lib/task-spec/include/task-spec/op_tensor_slot_spec.struct.toml +++ b/lib/task-spec/include/task-spec/op_tensor_slot_spec.struct.toml @@ -10,7 +10,7 @@ features = [ includes = [ "task-spec/slot_id_t.dtg.h", "task-spec/slot_type.dtg.h", - "pcg/tensor_role.dtg.h", + "op-attrs/tensor_role.dtg.h", "task-spec/is_grad.dtg.h", "task-spec/op_slot_options.dtg.h", ] diff --git a/lib/task-spec/include/task-spec/op_tensor_spec.struct.toml b/lib/task-spec/include/task-spec/op_tensor_spec.struct.toml index 3e790c7e08..d6cfa38c62 100644 --- a/lib/task-spec/include/task-spec/op_tensor_spec.struct.toml +++ b/lib/task-spec/include/task-spec/op_tensor_spec.struct.toml @@ -10,7 +10,7 @@ features = [ ] includes = [ - "pcg/tensor_role.dtg.h", + "op-attrs/tensor_role.dtg.h", "task-spec/op_slot_options.dtg.h", "utils/nonnegative_int/nonnegative_int.h", ] diff --git a/lib/task-spec/include/task-spec/op_training_tensor_type.enum.toml b/lib/task-spec/include/task-spec/op_training_tensor_type.enum.toml new file mode 100644 index 0000000000..b224700ee2 --- /dev/null +++ b/lib/task-spec/include/task-spec/op_training_tensor_type.enum.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "OpTrainingTensorType" +features = [ + "hash", + "fmt", + "rapidcheck", + "json", +] + +[[values]] +name = "FORWARD" + +[[values]] +name = "GRADIENT" + +[[values]] +name = "OPTIMIZER" diff --git a/lib/task-spec/include/task-spec/optimizer.h b/lib/task-spec/include/task-spec/optimizer.h index 5b898d8699..8e93530558 100644 --- a/lib/task-spec/include/task-spec/optimizer.h +++ b/lib/task-spec/include/task-spec/optimizer.h @@ -5,32 +5,32 @@ #include "pcg/optimizers/adam_optimizer_attrs.dtg.h" #include "pcg/optimizers/sgd_optimizer_attrs.dtg.h" #include "task-spec/task_impl_function.dtg.h" -#include "task-spec/task_invocation.dtg.h" -#include "task-spec/task_signature.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/runtime_task_signature.h" namespace FlexFlow { -TaskSignature get_update_signature(OptimizerAttrs const &); -TaskInvocation get_update_invocation( +RuntimeTaskSignature optimizer_attrs_get_update_signature(OptimizerAttrs const &); +RuntimeTaskInvocation optimizer_attrs_get_update_invocation( OptimizerAttrs const &, - forward_tensor_guid_t const &weight, - gradient_tensor_guid_t const &weight_grad, - std::vector const &grad_buffer_tensors); + symbolic_forward_tensor_guid_t const &weight, + symbolic_gradient_tensor_guid_t const &weight_grad, + std::vector const &grad_buffer_tensors); TaskImplFunction get_update_task_impl(OptimizerAttrs const &); -TaskSignature get_sgd_update_signature(); -TaskInvocation sgd_update(SGDOptimizerAttrs const &, - forward_tensor_guid_t const &weight, - gradient_tensor_guid_t const &weight_grad, - optimizer_tensor_guid_t const &sgd_v); +RuntimeTaskSignature get_sgd_update_signature(); +RuntimeTaskInvocation sgd_update(SGDOptimizerAttrs const &, + symbolic_forward_tensor_guid_t const &weight, + symbolic_gradient_tensor_guid_t const &weight_grad, + symbolic_optimizer_tensor_guid_t const &sgd_v); TaskImplFunction get_sgd_update_task_impl(); -TaskSignature get_adam_update_signature(); -TaskInvocation adam_update(AdamOptimizerAttrs const &, - forward_tensor_guid_t const &weight, - gradient_tensor_guid_t const &weight_grad, - optimizer_tensor_guid_t const &adam_v, - optimizer_tensor_guid_t const &adam_m); +RuntimeTaskSignature get_adam_update_signature(); +RuntimeTaskInvocation adam_update(AdamOptimizerAttrs const &, + symbolic_forward_tensor_guid_t const &weight, + symbolic_gradient_tensor_guid_t const &weight_grad, + symbolic_optimizer_tensor_guid_t const &adam_v, + symbolic_optimizer_tensor_guid_t const &adam_m); TaskImplFunction get_adam_update_task_impl(); } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/optimizer_tensor_source.h b/lib/task-spec/include/task-spec/optimizer_tensor_source.h deleted file mode 100644 index 2f10c5c35b..0000000000 --- a/lib/task-spec/include/task-spec/optimizer_tensor_source.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OPTIMIZER_TENSOR_SOURCE_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OPTIMIZER_TENSOR_SOURCE_H - -#include "task-spec/optimizer_tensor_guid_t.dtg.h" - -namespace FlexFlow { - -struct OptimizerTensorSource { -public: - OptimizerTensorSource(); - - optimizer_tensor_guid_t new_optimizer_tensor(); - - void reset(); - -private: - static int next_available_optimizer_tensor_id; -}; - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/parallel_tensor_shape_ref_type.struct.toml b/lib/task-spec/include/task-spec/parallel_tensor_shape_ref_type.struct.toml index 4ff411d17b..9c80fafb1c 100644 --- a/lib/task-spec/include/task-spec/parallel_tensor_shape_ref_type.struct.toml +++ b/lib/task-spec/include/task-spec/parallel_tensor_shape_ref_type.struct.toml @@ -10,7 +10,7 @@ features = [ includes = [ "utils/nonnegative_int/nonnegative_int.h", - "pcg/tensor_role.dtg.h", + "op-attrs/tensor_role.dtg.h", ] [[fields]] diff --git a/lib/task-spec/include/task-spec/per_device_op_state.h b/lib/task-spec/include/task-spec/per_device_op_state.h index ae6c93807c..6d291c7d45 100644 --- a/lib/task-spec/include/task-spec/per_device_op_state.h +++ b/lib/task-spec/include/task-spec/per_device_op_state.h @@ -2,15 +2,15 @@ #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_PER_DEVICE_OP_STATE_H #include "task-spec/concrete_arg_spec.h" -#include "task-spec/device_specific_device_states.dtg.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" #include "task-spec/per_device_op_state.dtg.h" #include "utils/type_index.h" namespace FlexFlow { PerDeviceOpState - get_device_state_from_device_specific(DeviceSpecificDeviceStates const &, - size_t device_idx); + get_device_state_from_device_specific(DeviceSpecificPerDeviceOpState const &, + device_id_t device_idx); } diff --git a/lib/task-spec/include/task-spec/per_device_op_state_runtime_arg_ref_type.struct.toml b/lib/task-spec/include/task-spec/per_device_op_state_runtime_arg_ref_type.struct.toml new file mode 100644 index 0000000000..b1fe504ab2 --- /dev/null +++ b/lib/task-spec/include/task-spec/per_device_op_state_runtime_arg_ref_type.struct.toml @@ -0,0 +1,16 @@ +namespace = "FlexFlow" +name = "PerDeviceOpStateRuntimeArgRefType" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/symbolic_layer_guid_t.dtg.h", +] + +[[fields]] +name = "layer" +type = "::FlexFlow::symbolic_layer_guid_t" diff --git a/lib/task-spec/include/task-spec/profiling.h b/lib/task-spec/include/task-spec/profiling.h index 91774f69ef..adbe67611a 100644 --- a/lib/task-spec/include/task-spec/profiling.h +++ b/lib/task-spec/include/task-spec/profiling.h @@ -1,15 +1,16 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_PROFILING_H -#define _FLEXFLOW_LOCAL_EXECUTION_PROFILING_H +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_PROFILING_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_PROFILING_H #include "kernels/profiling.h" -#include "spdlog/spdlog.h" +#include +#include "utils/containers/transform.h" namespace FlexFlow { enum class EnableProfiling { YES, NO }; template -std::optional profile(F const &f, +std::optional profile(F const &f, ProfilingSettings profiling, DeviceType device_type, Str s, @@ -19,7 +20,10 @@ std::optional profile(F const &f, if (elapsed.has_value()) { spdlog::debug(s, elapsed.value()); } - return elapsed; + return transform(elapsed, + [](float f) { + return milliseconds_t{f}; + }); } } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/runtime_arg_config.h b/lib/task-spec/include/task-spec/runtime_arg_config.h index 5358caf331..418cc53e61 100644 --- a/lib/task-spec/include/task-spec/runtime_arg_config.h +++ b/lib/task-spec/include/task-spec/runtime_arg_config.h @@ -1,15 +1,20 @@ #ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_ARG_CONFIG_H #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_ARG_CONFIG_H +#include "task-spec/concrete_arg_spec.h" #include "task-spec/runtime_arg_config.dtg.h" +#include "task-spec/slot_id_t.dtg.h" +#include "task-spec/runtime_task_binding.h" namespace FlexFlow { RuntimeArgConfig - cpu_make_runtime_arg_config(EnableProfiling enable_profiling, + cpu_make_runtime_arg_config(device_id_t device_id, + EnableProfiling enable_profiling, ProfilingSettings profiling_settings); RuntimeArgConfig - gpu_make_runtime_arg_config(PerDeviceFFHandle const &ff_handle, + gpu_make_runtime_arg_config(device_id_t device_id, + PerDeviceFFHandle const &ff_handle, EnableProfiling enable_profiling, ProfilingSettings profiling_settings); diff --git a/lib/task-spec/include/task-spec/runtime_arg_config.struct.toml b/lib/task-spec/include/task-spec/runtime_arg_config.struct.toml index 9d77616306..8e2b908585 100644 --- a/lib/task-spec/include/task-spec/runtime_arg_config.struct.toml +++ b/lib/task-spec/include/task-spec/runtime_arg_config.struct.toml @@ -3,15 +3,9 @@ name = "RuntimeArgConfig" features = [] includes = [ - "kernels/device_handle_t.dtg.h", - "task-spec/device_specific.h", "task-spec/profiling.h", ] -[[fields]] -name = "ff_handle" -type = "::FlexFlow::DeviceSpecific<::FlexFlow::device_handle_t>" - [[fields]] name = "enable_profiling" type = "::FlexFlow::EnableProfiling" diff --git a/lib/task-spec/include/task-spec/runtime_arg_ref.h b/lib/task-spec/include/task-spec/runtime_arg_ref.h index 532482f89e..5717c74767 100644 --- a/lib/task-spec/include/task-spec/runtime_arg_ref.h +++ b/lib/task-spec/include/task-spec/runtime_arg_ref.h @@ -5,8 +5,10 @@ #include "kernels/profiling_settings.dtg.h" #include "pcg/device_type.dtg.h" #include "task-spec/arg_ref.h" -#include "task-spec/config.h" #include "task-spec/device_specific.h" +#include "task-spec/ff_config.dtg.h" +#include "task-spec/ff_iteration_config.dtg.h" +#include "task-spec/per_device_op_state.dtg.h" #include "task-spec/runtime_arg_ref_type.dtg.h" namespace FlexFlow { @@ -14,12 +16,11 @@ namespace FlexFlow { template using RuntimeArgRef = ArgRef; -using RuntimeArgRefSpec = ArgRefSpec; - RuntimeArgRef profiling_settings(); RuntimeArgRef> ff_handle(); RuntimeArgRef iteration_config(); RuntimeArgRef kernel_device_type(); +RuntimeArgRef per_device_op_state_for_layer(symbolic_layer_guid_t); } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/runtime_arg_ref_spec.h b/lib/task-spec/include/task-spec/runtime_arg_ref_spec.h new file mode 100644 index 0000000000..566430bc6e --- /dev/null +++ b/lib/task-spec/include/task-spec/runtime_arg_ref_spec.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_ARG_REF_SPEC_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_ARG_REF_SPEC_H + +#include "task-spec/arg_ref_spec.h" +#include "task-spec/runtime_arg_ref_type.dtg.h" + +namespace FlexFlow { + +using RuntimeArgRefSpec = ArgRefSpec; + +} // namespace FlexFlow + +#endif + diff --git a/lib/task-spec/include/task-spec/runtime_arg_ref_type.variant.toml b/lib/task-spec/include/task-spec/runtime_arg_ref_type.variant.toml new file mode 100644 index 0000000000..b96cdc3925 --- /dev/null +++ b/lib/task-spec/include/task-spec/runtime_arg_ref_type.variant.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "RuntimeArgRefType" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/argumentless_runtime_arg_ref_type.dtg.h", + "task-spec/per_device_op_state_runtime_arg_ref_type.dtg.h", +] + +[[values]] +type = "::FlexFlow::ArgumentlessRuntimeArgRefType" +key = "argumentless" + + +[[values]] +type = "::FlexFlow::PerDeviceOpStateRuntimeArgRefType" +key = "per_device_op_state" diff --git a/lib/task-spec/include/task-spec/runtime_arg_spec.h b/lib/task-spec/include/task-spec/runtime_arg_spec.h new file mode 100644 index 0000000000..e387178fca --- /dev/null +++ b/lib/task-spec/include/task-spec/runtime_arg_spec.h @@ -0,0 +1,12 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_ARG_SPEC_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_ARG_SPEC_H + +#include "task-spec/runtime_arg_spec.dtg.h" + +namespace FlexFlow { + +std::type_index get_type_index(RuntimeArgSpec const &); + +} + +#endif diff --git a/lib/task-spec/include/task-spec/task_arg_spec.variant.toml b/lib/task-spec/include/task-spec/runtime_arg_spec.variant.toml similarity index 81% rename from lib/task-spec/include/task-spec/task_arg_spec.variant.toml rename to lib/task-spec/include/task-spec/runtime_arg_spec.variant.toml index 4829a50ff6..f6734dfde0 100644 --- a/lib/task-spec/include/task-spec/task_arg_spec.variant.toml +++ b/lib/task-spec/include/task-spec/runtime_arg_spec.variant.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "TaskArgSpec" +name = "RuntimeArgSpec" features = [ "eq", "fmt", @@ -8,7 +8,7 @@ features = [ includes = [ "task-spec/concrete_arg_spec.h", - "task-spec/runtime_arg_ref.h" + "task-spec/runtime_arg_ref_spec.h" ] [[values]] diff --git a/lib/task-spec/include/task-spec/runtime_task_binding.h b/lib/task-spec/include/task-spec/runtime_task_binding.h new file mode 100644 index 0000000000..ecec025e47 --- /dev/null +++ b/lib/task-spec/include/task-spec/runtime_task_binding.h @@ -0,0 +1,92 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_TASK_BINDING_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_TASK_BINDING_H + +#include "task-spec/symbolic_loss_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_optimizer_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_forward_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_gradient_tensor_guid_t.dtg.h" +#include "task-spec/slot_id_t.dtg.h" +#include "task-spec/runtime_arg_spec.dtg.h" +#include "task-spec/task_id_t.dtg.h" +#include "task-spec/runtime_task_signature.dtg.h" +#include "task-spec/symbolic_training_tensor_guid_t.dtg.h" +#include "task-spec/training_tensor_slot_id_t.dtg.h" +#include "task-spec/runtime_arg_ref.h" + +namespace FlexFlow { + +struct RuntimeTaskBinding { + RuntimeTaskBinding(); + + explicit RuntimeTaskBinding( + std::unordered_map const + &tensor_bindings, + std::unordered_map const &arg_bindings); + + void bind(int, symbolic_forward_tensor_guid_t const &); + void bind(slot_id_t, symbolic_forward_tensor_guid_t const &); + + void bind_grad(int, symbolic_gradient_tensor_guid_t const &); + void bind_grad(slot_id_t, symbolic_gradient_tensor_guid_t const &); + + void bind_optimizer(int, symbolic_optimizer_tensor_guid_t const &); + void bind_optimizer(slot_id_t, symbolic_optimizer_tensor_guid_t const &); + + void bind_loss(int, symbolic_loss_tensor_guid_t const &); + void bind_loss(slot_id_t, symbolic_loss_tensor_guid_t const &); + + template + void bind_arg(int name, T const &t) { + this->bind_arg(slot_id_t{name}, t); + } + + template + void bind_arg(slot_id_t name, T const &t) { + this->insert_arg_spec(name, RuntimeArgSpec{ConcreteArgSpec::create(t)}); + } + + template + void bind_arg(int name, RuntimeArgRef const &t) { + this->bind_arg(slot_id_t{name}, t); + } + + template + void bind_arg(slot_id_t name, RuntimeArgRef const &ref) { + this->insert_arg_spec(name, RuntimeArgSpec{RuntimeArgRefSpec::create(ref)}); + } + + bool operator==(RuntimeTaskBinding const &other) const; + bool operator!=(RuntimeTaskBinding const &other) const; + + std::unordered_map const & + get_tensor_bindings() const; + std::unordered_map const &get_arg_bindings() const; + void insert_arg_spec(slot_id_t name, RuntimeArgSpec const &arg_spec); + +private: + std::unordered_map + tensor_bindings; + std::unordered_map arg_bindings; + +private: + std::tuple + tie() const; + + friend ::std::hash; +}; + +std::string format_as(RuntimeTaskBinding const &x); +std::ostream &operator<<(std::ostream &s, RuntimeTaskBinding const &x); + +} // namespace FlexFlow + +namespace std { + +template <> +struct hash<::FlexFlow::RuntimeTaskBinding> { + size_t operator()(::FlexFlow::RuntimeTaskBinding const &s) const; +}; + +} // namespace std + +#endif diff --git a/lib/task-spec/include/task-spec/runtime_task_invocation.h b/lib/task-spec/include/task-spec/runtime_task_invocation.h new file mode 100644 index 0000000000..ebf108079c --- /dev/null +++ b/lib/task-spec/include/task-spec/runtime_task_invocation.h @@ -0,0 +1,12 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_TASK_INVOCATION_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_TASK_INVOCATION_H + +#include "task-spec/runtime_task_invocation.dtg.h" + +namespace FlexFlow { + +bool is_invocation_valid(RuntimeTaskSignature const &sig, RuntimeTaskInvocation const &inv); + +} + +#endif diff --git a/lib/task-spec/include/task-spec/task_invocation.struct.toml b/lib/task-spec/include/task-spec/runtime_task_invocation.struct.toml similarity index 64% rename from lib/task-spec/include/task-spec/task_invocation.struct.toml rename to lib/task-spec/include/task-spec/runtime_task_invocation.struct.toml index 38e02a1370..065490e16c 100644 --- a/lib/task-spec/include/task-spec/task_invocation.struct.toml +++ b/lib/task-spec/include/task-spec/runtime_task_invocation.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "TaskInvocation" +name = "RuntimeTaskInvocation" features = [ "eq", "fmt", @@ -7,7 +7,7 @@ features = [ ] includes = [ - "task-spec/task_binding.h", + "task-spec/runtime_task_binding.h", "task-spec/task_id_t.dtg.h" ] @@ -18,4 +18,4 @@ type = "::FlexFlow::task_id_t" [[fields]] name = "binding" -type = "::FlexFlow::TaskBinding" +type = "::FlexFlow::RuntimeTaskBinding" diff --git a/lib/task-spec/include/task-spec/task_signature.h b/lib/task-spec/include/task-spec/runtime_task_signature.h similarity index 58% rename from lib/task-spec/include/task-spec/task_signature.h rename to lib/task-spec/include/task-spec/runtime_task_signature.h index 8214e7e1b5..c8332971b6 100644 --- a/lib/task-spec/include/task-spec/task_signature.h +++ b/lib/task-spec/include/task-spec/runtime_task_signature.h @@ -1,35 +1,35 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_TASK_SIGNATURE_H -#define _FLEXFLOW_LOCAL_EXECUTION_TASK_SIGNATURE_H +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_TASK_SIGNATURE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_RUNTIME_TASK_SIGNATURE_H -#include "task-spec/task_signature.dtg.h" +#include "task-spec/runtime_task_signature.dtg.h" #include "utils/type_index.h" namespace FlexFlow { -TaskSignature make_empty_task_signature(); +RuntimeTaskSignature make_empty_runtime_task_signature(); -void add_slot(TaskSignature &, +void add_slot(RuntimeTaskSignature &, int name, - TensorType, + TrainingTensorType, SlotType slot_type = SlotType::TENSOR); -void add_slot(TaskSignature &, +void add_slot(RuntimeTaskSignature &, slot_id_t name, - TensorType, + TrainingTensorType, SlotType slot_type = SlotType::TENSOR); template -void add_arg_slot(TaskSignature &task_signature, int name) { +void add_arg_slot(RuntimeTaskSignature &task_signature, int name) { add_arg_slot(task_signature, slot_id_t{name}); } template -void add_arg_slot(TaskSignature &task_signature, slot_id_t name) { +void add_arg_slot(RuntimeTaskSignature &task_signature, slot_id_t name) { // static_assert(is_serializable::value, "Type must be serializable"); task_signature.task_arg_types.insert({name, get_type_index_for_type()}); } template -void add_return_value(TaskSignature &task_signature) { +void add_return_value(RuntimeTaskSignature &task_signature) { task_signature.return_value = get_type_index_for_type(); } @@ -40,7 +40,7 @@ void add_return_value(TaskSignature &task_signature) { */ template -void add_unchecked_arg_slot(TaskSignature &task_signature, int name) { +void add_unchecked_arg_slot(RuntimeTaskSignature &task_signature, int name) { add_unchecked_arg_slot(task_signature, slot_id_t{name}); } @@ -51,7 +51,7 @@ void add_unchecked_arg_slot(TaskSignature &task_signature, int name) { */ template -void add_unchecked_arg_slot(TaskSignature &task_signature, slot_id_t name) { +void add_unchecked_arg_slot(RuntimeTaskSignature &task_signature, slot_id_t name) { task_signature.task_arg_types.insert({name, get_type_index_for_type()}); } diff --git a/lib/task-spec/include/task-spec/task_signature.struct.toml b/lib/task-spec/include/task-spec/runtime_task_signature.struct.toml similarity index 95% rename from lib/task-spec/include/task-spec/task_signature.struct.toml rename to lib/task-spec/include/task-spec/runtime_task_signature.struct.toml index 3df0a8cfc7..6c62e03e7b 100644 --- a/lib/task-spec/include/task-spec/task_signature.struct.toml +++ b/lib/task-spec/include/task-spec/runtime_task_signature.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "TaskSignature" +name = "RuntimeTaskSignature" features = [ "eq", "fmt", diff --git a/lib/task-spec/include/task-spec/serialization.h b/lib/task-spec/include/task-spec/serialization.h index 2fc4b4b706..29f9144a3b 100644 --- a/lib/task-spec/include/task-spec/serialization.h +++ b/lib/task-spec/include/task-spec/serialization.h @@ -1,25 +1,12 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_SERIALIZATION_H -#define _FLEXFLOW_LOCAL_EXECUTION_SERIALIZATION_H +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SERIALIZATION_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SERIALIZATION_H #include "kernels/device.h" #include "kernels/nccl.h" -#include "op-attrs/dim_ordered/dim_ordered.h" +#include "op-attrs/ff_ordered/ff_ordered.h" #include "utils/required.h" -#include "utils/strong_typedef.h" #include "utils/type_traits.h" #include "utils/variant.h" -#include "utils/visitable.h" - -namespace FlexFlow { - -struct InternalTestType { - int x; - float y; -}; - -} // namespace FlexFlow - -VISITABLE_STRUCT(::FlexFlow::InternalTestType, x, y); namespace FlexFlow { @@ -46,26 +33,12 @@ struct visit_trivially_serializable> { template <> struct visit_trivially_serializable<> : std::true_type {}; -template -struct is_trivially_serializable< - T, - typename std::enable_if< - visit_trivially_serializable>::value>::type> - : std::true_type {}; - template struct is_trivially_serializable< T, typename std::enable_if::value>::type> : std::true_type {}; -template -struct is_trivially_serializable>> - : is_trivially_serializable> {}; - -template -struct is_trivially_serializable> : is_trivially_serializable {}; - template <> struct is_trivially_serializable : std::true_type {}; template <> @@ -86,9 +59,9 @@ template struct is_trivially_serializable> : is_trivially_serializable {}; -template -struct is_trivially_serializable> - : is_trivially_serializable {}; +template +struct is_trivially_serializable> : is_trivially_serializable { +}; template struct is_trivially_serializable> @@ -134,11 +107,6 @@ static_assert(is_trivially_serializable::value, ""); static_assert(is_trivially_serializable::value, ""); static_assert(is_trivially_serializable>::value, ""); -static_assert(std::is_same, - std::tuple>::value, - ""); -static_assert(visit_trivially_serializable::value, ""); -static_assert(is_trivially_serializable::value, ""); } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.h b/lib/task-spec/include/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.h new file mode 100644 index 0000000000..446f16d134 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.h @@ -0,0 +1,19 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_CG_OP_ATTRS_AND_TRAINING_SIGNATURE_WITH_SHAPES_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_CG_OP_ATTRS_AND_TRAINING_SIGNATURE_WITH_SHAPES_H + +#include "task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.dtg.h" + +namespace FlexFlow { + +SymbolicLayerTrainingTensorGroupSignatureWithShapes + get_signature_with_shapes(SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &); + +SymbolicCgOpAttrsAndTrainingSignatureWithShapes + make_symbolic_cg_op_attrs_and_signature_with_shapes( + ComputationGraphOpAttrs const &, + SymbolicLayerTrainingTensorGroupSignatureWithShapes const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.struct.toml b/lib/task-spec/include/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.struct.toml new file mode 100644 index 0000000000..24ba0ea34a --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.struct.toml @@ -0,0 +1,36 @@ +namespace = "FlexFlow" +name = "SymbolicCgOpAttrsAndTrainingSignatureWithShapes" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "", + "task-spec/symbolic_training_tensor_group_with_shape.dtg.h", + "op-attrs/computation_graph_op_attrs.dtg.h", +] + +src_includes = [ + "utils/ord/vector.h", + "utils/hash/vector.h", + "utils/fmt/vector.h", +] + +[[fields]] +name = "op_attrs" +type = "::FlexFlow::ComputationGraphOpAttrs" + +[[fields]] +name = "input_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroupWithShape>" + +[[fields]] +name = "weight_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroupWithShape>" + +[[fields]] +name = "output_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroupWithShape>" diff --git a/lib/task-spec/include/task-spec/optimizer_tensor_guid_t.struct.toml b/lib/task-spec/include/task-spec/symbolic_forward_tensor_guid_t.struct.toml similarity index 74% rename from lib/task-spec/include/task-spec/optimizer_tensor_guid_t.struct.toml rename to lib/task-spec/include/task-spec/symbolic_forward_tensor_guid_t.struct.toml index dc5f98886f..365ca1da5a 100644 --- a/lib/task-spec/include/task-spec/optimizer_tensor_guid_t.struct.toml +++ b/lib/task-spec/include/task-spec/symbolic_forward_tensor_guid_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "optimizer_tensor_guid_t" +name = "symbolic_forward_tensor_guid_t" features = [ "eq", "ord", diff --git a/lib/task-spec/include/task-spec/symbolic_forward_tensor_source.h b/lib/task-spec/include/task-spec/symbolic_forward_tensor_source.h new file mode 100644 index 0000000000..19630c737a --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_forward_tensor_source.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_FORWARD_TRAINING_SOURCE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_FORWARD_TRAINING_SOURCE_H + +#include "task-spec/symbolic_forward_tensor_guid_t.dtg.h" + +namespace FlexFlow { + +struct SymbolicForwardTensorSource { +public: + SymbolicForwardTensorSource(); + + symbolic_forward_tensor_guid_t new_symbolic_forward_tensor(); + + void reset(); + +private: + static int next_available_symbolic_forward_tensor_id; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/forward_tensor_guid_t.struct.toml b/lib/task-spec/include/task-spec/symbolic_gradient_tensor_guid_t.struct.toml similarity index 74% rename from lib/task-spec/include/task-spec/forward_tensor_guid_t.struct.toml rename to lib/task-spec/include/task-spec/symbolic_gradient_tensor_guid_t.struct.toml index 68fc4b6815..cb4c398a00 100644 --- a/lib/task-spec/include/task-spec/forward_tensor_guid_t.struct.toml +++ b/lib/task-spec/include/task-spec/symbolic_gradient_tensor_guid_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "forward_tensor_guid_t" +name = "symbolic_gradient_tensor_guid_t" features = [ "eq", "ord", diff --git a/lib/task-spec/include/task-spec/symbolic_gradient_tensor_source.h b/lib/task-spec/include/task-spec/symbolic_gradient_tensor_source.h new file mode 100644 index 0000000000..230d1b2974 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_gradient_tensor_source.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_GRADIENT_TENSOR_SOURCE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_GRADIENT_TENSOR_SOURCE_H + +#include "task-spec/symbolic_gradient_tensor_guid_t.dtg.h" + +namespace FlexFlow { + +struct SymbolicGradientTensorSource { +public: + SymbolicGradientTensorSource(); + + symbolic_gradient_tensor_guid_t new_symbolic_gradient_tensor(); + + void reset(); + +private: + static int next_available_symbolic_gradient_tensor_id; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_layer_guid_t.struct.toml b/lib/task-spec/include/task-spec/symbolic_layer_guid_t.struct.toml new file mode 100644 index 0000000000..2302e9a06b --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_layer_guid_t.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "symbolic_layer_guid_t" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "utils/graph/node/node.dtg.h", +] + +[[fields]] +name = "raw_node" +type = "::FlexFlow::Node" + diff --git a/lib/task-spec/include/task-spec/symbolic_layer_tensor_shape_signature.h b/lib/task-spec/include/task-spec/symbolic_layer_tensor_shape_signature.h new file mode 100644 index 0000000000..e4b8626b35 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_layer_tensor_shape_signature.h @@ -0,0 +1,20 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_LAYER_TENSOR_SHAPE_SIGNATURE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_LAYER_TENSOR_SHAPE_SIGNATURE_H + +#include "op-attrs/tensor_role.dtg.h" +#include "task-spec/symbolic_layer_tensor_shape_signature.dtg.h" + +namespace FlexFlow { + +std::vector + tensor_shapes_for_role(SymbolicLayerTensorShapeSignature const &signaturte, + TensorRole tensor_role); + +TensorShape + tensor_shape_for_role_and_index(SymbolicLayerTensorShapeSignature const &signature, + TensorRole tensor_role, + nonnegative_int tensor_idx); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/cg_operator_tensor_shape_signature.struct.toml b/lib/task-spec/include/task-spec/symbolic_layer_tensor_shape_signature.struct.toml similarity index 91% rename from lib/pcg/include/pcg/cg_operator_tensor_shape_signature.struct.toml rename to lib/task-spec/include/task-spec/symbolic_layer_tensor_shape_signature.struct.toml index a2a6c047c6..f21a7f7ebd 100644 --- a/lib/pcg/include/pcg/cg_operator_tensor_shape_signature.struct.toml +++ b/lib/task-spec/include/task-spec/symbolic_layer_tensor_shape_signature.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "CGOperatorTensorShapeSignature" +name = "SymbolicLayerTensorShapeSignature" features = [ "eq", "ord", diff --git a/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature.h b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature.h new file mode 100644 index 0000000000..e78cf434cd --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_LAYER_SYMBOLIC_TENSOR_GROUP_SIGNATURE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_LAYER_SYMBOLIC_TENSOR_GROUP_SIGNATURE_H + +#include "op-attrs/tensor_role.dtg.h" +#include "task-spec/fwb_tensor_type.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" +#include "task-spec/symbolic_training_tensor_guid_t.dtg.h" +#include "utils/nonnegative_int/nonnegative_int.h" + +namespace FlexFlow { + +std::vector get_training_tensor_groups_for_role( + SymbolicLayerTrainingTensorGroupSignature const &signature, TensorRole tensor_role); + +SymbolicTrainingTensorGroup get_training_tensor_group_for_role_and_index( + SymbolicLayerTrainingTensorGroupSignature const &signature, + TensorRole tensor_role, + nonnegative_int index); + +std::vector + get_training_tensors_for_role_and_type(SymbolicLayerTrainingTensorGroupSignature const &, + TensorRole, + FwbTensorType); + + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature.struct.toml b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature.struct.toml new file mode 100644 index 0000000000..f45f00f838 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature.struct.toml @@ -0,0 +1,30 @@ +namespace = "FlexFlow" +name = "SymbolicLayerTrainingTensorGroupSignature" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/symbolic_training_tensor_group.dtg.h", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h", + "utils/ord/vector.h", +] + +[[fields]] +name = "input_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroup>" + +[[fields]] +name = "weight_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroup>" + +[[fields]] +name = "output_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroup>" diff --git a/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.h b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.h new file mode 100644 index 0000000000..6e7a460352 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_LAYER_TRAINING_TENSOR_GROUP_SIGNATURE_WITH_SHAPES_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_LAYER_TRAINING_TENSOR_GROUP_SIGNATURE_WITH_SHAPES_H + +#include "task-spec/symbolic_layer_tensor_shape_signature.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" + +namespace FlexFlow { + +SymbolicLayerTrainingTensorGroupSignature + drop_shapes_from_signature(SymbolicLayerTrainingTensorGroupSignatureWithShapes const &); + +SymbolicLayerTensorShapeSignature + get_shape_signature(SymbolicLayerTrainingTensorGroupSignatureWithShapes const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.struct.toml b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.struct.toml new file mode 100644 index 0000000000..2141964d57 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.struct.toml @@ -0,0 +1,31 @@ +namespace = "FlexFlow" +name = "SymbolicLayerTrainingTensorGroupSignatureWithShapes" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "", + "task-spec/symbolic_training_tensor_group_with_shape.dtg.h", +] + +src_includes = [ + "utils/ord/vector.h", + "utils/hash/vector.h", + "utils/fmt/vector.h", +] + +[[fields]] +name = "input_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroupWithShape>" + +[[fields]] +name = "weight_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroupWithShape>" + +[[fields]] +name = "output_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroupWithShape>" diff --git a/lib/task-spec/include/task-spec/loss_tensor_guid_t.struct.toml b/lib/task-spec/include/task-spec/symbolic_loss_tensor_guid_t.struct.toml similarity index 84% rename from lib/task-spec/include/task-spec/loss_tensor_guid_t.struct.toml rename to lib/task-spec/include/task-spec/symbolic_loss_tensor_guid_t.struct.toml index c00ccbb0f2..642e25d788 100644 --- a/lib/task-spec/include/task-spec/loss_tensor_guid_t.struct.toml +++ b/lib/task-spec/include/task-spec/symbolic_loss_tensor_guid_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "loss_tensor_guid_t" +name = "symbolic_loss_tensor_guid_t" features = [ "eq", "ord", @@ -11,6 +11,7 @@ includes = [ "utils/nonnegative_int/nonnegative_int.h" ] + [[fields]] name = "raw_index" type = "::FlexFlow::nonnegative_int" diff --git a/lib/task-spec/include/task-spec/loss_tensor_source.h b/lib/task-spec/include/task-spec/symbolic_loss_tensor_source.h similarity index 52% rename from lib/task-spec/include/task-spec/loss_tensor_source.h rename to lib/task-spec/include/task-spec/symbolic_loss_tensor_source.h index 21091109e5..23988fcf59 100644 --- a/lib/task-spec/include/task-spec/loss_tensor_source.h +++ b/lib/task-spec/include/task-spec/symbolic_loss_tensor_source.h @@ -1,19 +1,19 @@ #ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_LOSS_TENSOR_SOURCE_H #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_LOSS_TENSOR_SOURCE_H -#include "task-spec/loss_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_loss_tensor_guid_t.dtg.h" #include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { -struct LossTensorSource { +struct SymbolicLossTensorSource { public: - LossTensorSource(); + SymbolicLossTensorSource(); - loss_tensor_guid_t new_loss_tensor(); + symbolic_loss_tensor_guid_t new_symbolic_loss_tensor(); private: - static nonnegative_int next_available_loss_tensor_id; + static nonnegative_int next_available_symbolic_loss_tensor_id; }; } // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/gradient_tensor_guid_t.struct.toml b/lib/task-spec/include/task-spec/symbolic_optimizer_tensor_guid_t.struct.toml similarity index 73% rename from lib/task-spec/include/task-spec/gradient_tensor_guid_t.struct.toml rename to lib/task-spec/include/task-spec/symbolic_optimizer_tensor_guid_t.struct.toml index b75e27a9d2..a9e4757e7b 100644 --- a/lib/task-spec/include/task-spec/gradient_tensor_guid_t.struct.toml +++ b/lib/task-spec/include/task-spec/symbolic_optimizer_tensor_guid_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "gradient_tensor_guid_t" +name = "symbolic_optimizer_tensor_guid_t" features = [ "eq", "ord", diff --git a/lib/task-spec/include/task-spec/symbolic_optimizer_tensor_source.h b/lib/task-spec/include/task-spec/symbolic_optimizer_tensor_source.h new file mode 100644 index 0000000000..b0ffc912af --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_optimizer_tensor_source.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_OPTIMIZER_TENSOR_SOURCE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_OPTIMIZER_TENSOR_SOURCE_H + +#include "task-spec/symbolic_optimizer_tensor_guid_t.dtg.h" + +namespace FlexFlow { + +struct SymbolicOptimizerTensorSource { +public: + SymbolicOptimizerTensorSource(); + + symbolic_optimizer_tensor_guid_t new_symbolic_optimizer_tensor(); + + void reset(); + +private: + static int next_available_symbolic_optimizer_tensor_id; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_tensor_guid_t.struct.toml b/lib/task-spec/include/task-spec/symbolic_tensor_guid_t.struct.toml new file mode 100644 index 0000000000..a1a4f55696 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_tensor_guid_t.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "symbolic_tensor_guid_t" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "utils/graph/dataflow_graph/dataflow_output.dtg.h", +] + +[[fields]] +name = "raw_graph_output" +type = "::FlexFlow::DataflowOutput" + diff --git a/lib/task-spec/include/task-spec/symbolic_training_layer_attrs_plus_context.h b/lib/task-spec/include/task-spec/symbolic_training_layer_attrs_plus_context.h new file mode 100644 index 0000000000..13cd64806c --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_layer_attrs_plus_context.h @@ -0,0 +1,42 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_LAYER_ATTRS_PLUS_CONTEXT_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_LAYER_ATTRS_PLUS_CONTEXT_H + +#include "op-attrs/tensor_role.dtg.h" +#include "task-spec/symbolic_training_layer_attrs_plus_context.dtg.h" +#include "task-spec/symbolic_training_tensor_group.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" + +namespace FlexFlow { + +std::vector + get_training_tensor_groups_for_role( + SymbolicTrainingLayerAttrsPlusContext const &training_layer_plus_context, + TensorRole tensor_role); + +SymbolicTrainingTensorGroup + get_training_tensor_group_for_role_and_index( + SymbolicTrainingLayerAttrsPlusContext const &training_layer_plus_context, + TensorRole tensor_role, + nonnegative_int index); + +std::vector + get_input_tensors(SymbolicTrainingLayerAttrsPlusContext const &); +std::vector + get_input_grad_tensors(SymbolicTrainingLayerAttrsPlusContext const &); + +std::vector + get_weight_tensors(SymbolicTrainingLayerAttrsPlusContext const &); +std::vector + get_weight_grad_tensors(SymbolicTrainingLayerAttrsPlusContext const &); + +std::vector + get_output_tensors(SymbolicTrainingLayerAttrsPlusContext const &); +std::vector + get_output_grad_tensors(SymbolicTrainingLayerAttrsPlusContext const &); + +SymbolicLayerTrainingTensorGroupSignature + get_tensor_group_signature(SymbolicTrainingLayerAttrsPlusContext const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_training_layer_attrs_plus_context.struct.toml b/lib/task-spec/include/task-spec/symbolic_training_layer_attrs_plus_context.struct.toml new file mode 100644 index 0000000000..5276072c24 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_layer_attrs_plus_context.struct.toml @@ -0,0 +1,25 @@ +namespace = "FlexFlow" +name = "SymbolicTrainingLayerAttrsPlusContext" +features = [] + +includes = [ + "pcg/layer_guid_t.dtg.h", + "pcg/layer_attrs.dtg.h", + "task-spec/symbolic_training_tensor_group.dtg.h", +] + +[[fields]] +name = "layer_attrs" +type = "::FlexFlow::LayerAttrs" + +[[fields]] +name = "input_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroup>" + +[[fields]] +name = "weight_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroup>" + +[[fields]] +name = "output_tensor_groups" +type = "std::vector<::FlexFlow::SymbolicTrainingTensorGroup>" diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_group.h b/lib/task-spec/include/task-spec/symbolic_training_tensor_group.h new file mode 100644 index 0000000000..9984d6f5ad --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_group.h @@ -0,0 +1,33 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_TENSOR_GROUP_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_TENSOR_GROUP_H + +#include "op-attrs/tensor_role.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "pcg/tensor_attrs.dtg.h" +#include "pcg/tensor_guid_t.dtg.h" +#include "task-spec/op_training_tensor_type.dtg.h" +#include "task-spec/symbolic_forward_tensor_source.h" +#include "task-spec/symbolic_gradient_tensor_source.h" +#include "task-spec/symbolic_optimizer_tensor_source.h" +#include "task-spec/symbolic_training_tensor_group.dtg.h" +#include "task-spec/symbolic_training_tensor_guid_t.dtg.h" +#include "task-spec/fwb_tensor_type.dtg.h" + +namespace FlexFlow { + +SymbolicTrainingTensorGroup make_symbolic_training_tensor_group( + CreateGrad create_grad, + OptimizerAttrs const &optimizer_attrs, + SymbolicForwardTensorSource &forward_tensor_source, + SymbolicGradientTensorSource &gradient_tensor_source, + SymbolicOptimizerTensorSource &optimizer_tensor_source); + +symbolic_training_tensor_guid_t + get_training_tensor_for_type(SymbolicTrainingTensorGroup const &, FwbTensorType); + +std::unordered_set + get_all_training_tensors_in_tensor_group(SymbolicTrainingTensorGroup const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_group.struct.toml b/lib/task-spec/include/task-spec/symbolic_training_tensor_group.struct.toml new file mode 100644 index 0000000000..7784267814 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_group.struct.toml @@ -0,0 +1,31 @@ +namespace = "FlexFlow" +name = "SymbolicTrainingTensorGroup" +features = [ + "eq", + "ord", + "fmt", + "hash", +] + +includes = [ + "task-spec/symbolic_forward_tensor_guid_t.dtg.h", + "task-spec/symbolic_gradient_tensor_guid_t.dtg.h", + "task-spec/symbolic_optimizer_tensor_guid_t.dtg.h", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h", +] + +[[fields]] +name = "forward_tensor" +type = "::FlexFlow::symbolic_forward_tensor_guid_t" + +[[fields]] +name = "gradient_tensor" +type = "::FlexFlow::symbolic_gradient_tensor_guid_t" + +[[fields]] +name = "optimizer_tensors" +type = "std::vector<::FlexFlow::symbolic_optimizer_tensor_guid_t>" diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_attrs.h b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_attrs.h new file mode 100644 index 0000000000..93fe4b4b77 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_attrs.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_TENSOR_GROUP_WITH_ATTRS_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_TENSOR_GROUP_WITH_ATTRS_H + +#include "op-attrs/tensor_shape.dtg.h" +#include "task-spec/symbolic_training_tensor_group.dtg.h" +#include "task-spec/symbolic_training_tensor_group_with_attrs.dtg.h" + +namespace FlexFlow { + +SymbolicTrainingTensorGroupWithAttrs + make_symbolic_training_tensor_group_with_attrs_from_group_and_attrs( + SymbolicTrainingTensorGroup const &, + TensorShape const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_attrs.struct.toml b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_attrs.struct.toml new file mode 100644 index 0000000000..4d48a31df4 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_attrs.struct.toml @@ -0,0 +1,36 @@ +namespace = "FlexFlow" +name = "SymbolicTrainingTensorGroupWithAttrs" +features = [ + "eq", + "ord", + "fmt", + "hash", +] + +includes = [ + "op-attrs/tensor_shape.dtg.h", + "task-spec/symbolic_forward_tensor_guid_t.dtg.h", + "task-spec/symbolic_gradient_tensor_guid_t.dtg.h", + "task-spec/symbolic_optimizer_tensor_guid_t.dtg.h", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h", +] + +[[fields]] +name = "tensor_shape" +type = "::FlexFlow::TensorShape" + +[[fields]] +name = "forward_tensor" +type = "::FlexFlow::symbolic_forward_tensor_guid_t" + +[[fields]] +name = "gradient_tensor" +type = "::FlexFlow::symbolic_gradient_tensor_guid_t" + +[[fields]] +name = "optimizer_tensors" +type = "std::vector<::FlexFlow::symbolic_optimizer_tensor_guid_t>" diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_shape.h b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_shape.h new file mode 100644 index 0000000000..4b443a0ecd --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_shape.h @@ -0,0 +1,13 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_TENSOR_GROUP_WITH_SHAPE_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_SYMBOLIC_TRAINING_TENSOR_GROUP_WITH_SHAPE_H + +#include "task-spec/symbolic_training_tensor_group_with_shape.dtg.h" + +namespace FlexFlow { + +SymbolicTrainingTensorGroup + drop_shape_from_group(SymbolicTrainingTensorGroupWithShape const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_shape.struct.toml b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_shape.struct.toml new file mode 100644 index 0000000000..ef467ea07b --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_group_with_shape.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "SymbolicTrainingTensorGroupWithShape" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/symbolic_training_tensor_group.dtg.h", + "op-attrs/tensor_shape.dtg.h", +] + +[[fields]] +name = "tensor_shape" +type = "::FlexFlow::TensorShape" + +[[fields]] +name = "training_tensor_group" +type = "::FlexFlow::SymbolicTrainingTensorGroup" diff --git a/lib/task-spec/include/task-spec/symbolic_training_tensor_guid_t.variant.toml b/lib/task-spec/include/task-spec/symbolic_training_tensor_guid_t.variant.toml new file mode 100644 index 0000000000..0e41c52c23 --- /dev/null +++ b/lib/task-spec/include/task-spec/symbolic_training_tensor_guid_t.variant.toml @@ -0,0 +1,31 @@ +namespace = "FlexFlow" +name = "symbolic_training_tensor_guid_t" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/symbolic_forward_tensor_guid_t.dtg.h", + "task-spec/symbolic_optimizer_tensor_guid_t.dtg.h", + "task-spec/symbolic_gradient_tensor_guid_t.dtg.h", + "task-spec/symbolic_loss_tensor_guid_t.dtg.h" +] + +[[values]] +type = "::FlexFlow::symbolic_forward_tensor_guid_t" +key = "forward_tensor" + +[[values]] +type = "::FlexFlow::symbolic_gradient_tensor_guid_t" +key = "gradient_tensor" + +[[values]] +type = "::FlexFlow::symbolic_optimizer_tensor_guid_t" +key = "optimizer_tensor" + +[[values]] +type = "::FlexFlow::symbolic_loss_tensor_guid_t" +key = "loss_tensor" diff --git a/lib/task-spec/include/task-spec/task_arg_spec.h b/lib/task-spec/include/task-spec/task_arg_spec.h deleted file mode 100644 index 38879ecab9..0000000000 --- a/lib/task-spec/include/task-spec/task_arg_spec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TASK_ARG_SPEC_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TASK_ARG_SPEC_H - -#include "task-spec/task_arg_spec.dtg.h" - -namespace FlexFlow { - -std::type_index get_type_index(TaskArgSpec const &); - -} - -#endif diff --git a/lib/task-spec/include/task-spec/task_argument_accessor.h b/lib/task-spec/include/task-spec/task_argument_accessor.h index a6d71b6b70..7b4527be82 100644 --- a/lib/task-spec/include/task-spec/task_argument_accessor.h +++ b/lib/task-spec/include/task-spec/task_argument_accessor.h @@ -28,7 +28,7 @@ struct TaskArgumentAccessor { template privilege_mode_to_accessor get_tensor(slot_id_t slot) const { return std::get>( - this->ptr->get_tensor(slot, PRIV, TensorType::FORWARD)); + this->ptr->get_tensor(slot, PRIV, TrainingTensorType::FORWARD)); } template @@ -39,7 +39,7 @@ struct TaskArgumentAccessor { template privilege_mode_to_accessor get_tensor_grad(slot_id_t slot) const { return std::get>( - this->ptr->get_tensor(slot, PRIV, TensorType::GRADIENT)); + this->ptr->get_tensor(slot, PRIV, TrainingTensorType::GRADIENT)); } template @@ -50,7 +50,7 @@ struct TaskArgumentAccessor { template privilege_mode_to_accessor get_optimizer_tensor(slot_id_t slot) const { return std::get>( - this->ptr->get_tensor(slot, PRIV, TensorType::OPTIMIZER)); + this->ptr->get_tensor(slot, PRIV, TrainingTensorType::OPTIMIZER)); } template @@ -61,7 +61,7 @@ struct TaskArgumentAccessor { template privilege_mode_to_accessor get_loss_tensor(slot_id_t slot) const { return std::get>( - this->ptr->get_tensor(slot, PRIV, TensorType::LOSS)); + this->ptr->get_tensor(slot, PRIV, TrainingTensorType::LOSS)); } // variadic tensors @@ -75,7 +75,7 @@ struct TaskArgumentAccessor { std::vector> get_variadic_tensor(slot_id_t slot) const { return std::get>>( - this->ptr->get_variadic_tensor(slot, PRIV, TensorType::FORWARD)); + this->ptr->get_variadic_tensor(slot, PRIV, TrainingTensorType::FORWARD)); } template @@ -88,7 +88,7 @@ struct TaskArgumentAccessor { std::vector> get_variadic_tensor_grad(slot_id_t slot) const { return std::get>>( - this->ptr->get_variadic_tensor(slot, PRIV, TensorType::GRADIENT)); + this->ptr->get_variadic_tensor(slot, PRIV, TrainingTensorType::GRADIENT)); } template @@ -101,7 +101,7 @@ struct TaskArgumentAccessor { std::vector> get_variadic_optimizer_tensor(slot_id_t slot) const { return std::get>>( - this->ptr->get_variadic_tensor(slot, PRIV, TensorType::OPTIMIZER)); + this->ptr->get_variadic_tensor(slot, PRIV, TrainingTensorType::OPTIMIZER)); } template @@ -114,13 +114,22 @@ struct TaskArgumentAccessor { std::vector> get_variadic_loss_tensor(slot_id_t slot) const { return std::get>>( - this->ptr->get_variadic_tensor(slot, PRIV, TensorType::LOSS)); + this->ptr->get_variadic_tensor(slot, PRIV, TrainingTensorType::LOSS)); } Allocator get_allocator() const { return this->ptr->get_allocator(); } + device_id_t get_device_idx() const { + return this->ptr->get_device_idx(); + } + + template + DeviceSpecific make_device_specific(T const &t) const { + return DeviceSpecific::create(this->get_device_idx(), t); + } + template static typename std::enable_if::value, diff --git a/lib/task-spec/include/task-spec/task_binding.h b/lib/task-spec/include/task-spec/task_binding.h deleted file mode 100644 index 4cc286e104..0000000000 --- a/lib/task-spec/include/task-spec/task_binding.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_TASK_BINDING_H -#define _FLEXFLOW_LOCAL_EXECUTION_TASK_BINDING_H - -#include "task-spec/loss_tensor_guid_t.dtg.h" -#include "task-spec/optimizer_tensor_guid_t.dtg.h" -#include "task-spec/slot_id_t.dtg.h" -#include "task-spec/task_arg_spec.dtg.h" -#include "task-spec/task_id_t.dtg.h" -#include "task-spec/task_signature.dtg.h" -#include "task-spec/tensor_sub_slot_id_t.dtg.h" -#include "task-spec/training_tensor_guid_t.dtg.h" - -namespace FlexFlow { - -struct TaskBinding { - TaskBinding(); - - explicit TaskBinding( - std::unordered_map const - &tensor_bindings, - std::unordered_map const &arg_bindings); - - void bind(int, forward_tensor_guid_t const &); - void bind(slot_id_t, forward_tensor_guid_t const &); - - void bind_grad(int, gradient_tensor_guid_t const &); - void bind_grad(slot_id_t, gradient_tensor_guid_t const &); - - void bind_optimizer(int, optimizer_tensor_guid_t const &); - void bind_optimizer(slot_id_t, optimizer_tensor_guid_t const &); - - void bind_loss(int, loss_tensor_guid_t const &); - void bind_loss(slot_id_t, loss_tensor_guid_t const &); - - template - void bind_arg(int name, T const &t) { - this->bind_arg(slot_id_t{name}, t); - } - - template - void bind_arg(slot_id_t name, T const &t) { - this->insert_arg_spec(name, TaskArgSpec{ConcreteArgSpec::create(t)}); - } - - template - void bind_arg(int name, RuntimeArgRef const &t) { - this->bind_arg(slot_id_t{name}, t); - } - - template - void bind_arg(slot_id_t name, RuntimeArgRef const &ref) { - this->insert_arg_spec(name, TaskArgSpec{RuntimeArgRefSpec::create(ref)}); - } - - bool operator==(TaskBinding const &other) const; - bool operator!=(TaskBinding const &other) const; - - std::unordered_map const & - get_tensor_bindings() const; - std::unordered_map const &get_arg_bindings() const; - void insert_arg_spec(slot_id_t name, TaskArgSpec const &arg_spec); - -private: - std::unordered_map - tensor_bindings; - std::unordered_map arg_bindings; - -private: - std::tuple - tie() const; - - friend ::std::hash; -}; - -std::string format_as(TaskBinding const &x); -std::ostream &operator<<(std::ostream &s, TaskBinding const &x); - -} // namespace FlexFlow - -namespace std { - -template <> -struct hash<::FlexFlow::TaskBinding> { - size_t operator()(::FlexFlow::TaskBinding const &s) const; -}; - -} // namespace std - -#endif diff --git a/lib/task-spec/include/task-spec/task_invocation.h b/lib/task-spec/include/task-spec/task_invocation.h deleted file mode 100644 index 85940091a1..0000000000 --- a/lib/task-spec/include/task-spec/task_invocation.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_TASK_INVOCATION_H -#define _FLEXFLOW_LOCAL_EXECUTION_TASK_INVOCATION_H - -#include "task-spec/task_invocation.dtg.h" - -namespace FlexFlow { - -bool is_invocation_valid(TaskSignature const &sig, TaskInvocation const &inv); - -} - -#endif diff --git a/lib/task-spec/include/task-spec/task_signature_impl.h b/lib/task-spec/include/task-spec/task_signature_impl.h index a781e53485..453e6bbcc4 100644 --- a/lib/task-spec/include/task-spec/task_signature_impl.h +++ b/lib/task-spec/include/task-spec/task_signature_impl.h @@ -11,12 +11,14 @@ namespace FlexFlow { TaskSignatureAndImpl get_task_signature_and_impl_for_task_id(task_id_t const &); std::vector get_task_ids(ComputationGraphOpAttrs const &); -OpTaskInvocation get_init_op_task_invocation(ComputationGraphOpAttrs const &); -OpTaskInvocation +std::optional get_init_op_task_invocation(ComputationGraphOpAttrs const &); +std::optional get_forward_op_task_invocation(ComputationGraphOpAttrs const &); -OpTaskInvocation +std::optional get_backward_op_task_invocation(ComputationGraphOpAttrs const &); +std::optional get_op_task_invocation(ComputationGraphOpAttrs const &, OpTaskType); + } // namespace FlexFlow #endif diff --git a/lib/task-spec/include/task-spec/tensor_type_slot_spec.struct.toml b/lib/task-spec/include/task-spec/tensor_type_slot_spec.struct.toml index 26e70a5ef8..93372d18fc 100644 --- a/lib/task-spec/include/task-spec/tensor_type_slot_spec.struct.toml +++ b/lib/task-spec/include/task-spec/tensor_type_slot_spec.struct.toml @@ -10,7 +10,7 @@ features = [ includes = [ "task-spec/slot_type.dtg.h", "task-spec/slot_id_t.dtg.h", - "task-spec/tensor_type.dtg.h", + "task-spec/training_tensor_type.dtg.h", ] [[fields]] @@ -19,7 +19,7 @@ type = "::FlexFlow::slot_id_t" [[fields]] name = "tensor_type" -type = "::FlexFlow::TensorType" +type = "::FlexFlow::TrainingTensorType" [[fields]] name = "slot_type" diff --git a/lib/task-spec/include/task-spec/training_computation_graph.h b/lib/task-spec/include/task-spec/training_computation_graph.h deleted file mode 100644 index 1cda57a49e..0000000000 --- a/lib/task-spec/include/task-spec/training_computation_graph.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_COMPUTATION_GRAPH_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_COMPUTATION_GRAPH_H - -#include "pcg/optimizer_attrs.dtg.h" -#include "task-spec/forward_tensor_source.h" -#include "task-spec/gradient_tensor_source.h" -#include "task-spec/loss_tensor_source.h" -#include "task-spec/optimizer_tensor_source.h" -#include "task-spec/training_computation_graph.dtg.h" -#include "task-spec/training_layer_plus_context.dtg.h" -#include "task-spec/training_tensor_guid_t.dtg.h" - -namespace FlexFlow { - -TrainingComputationGraph generate_training_computation_graph( - ComputationGraph const &computation_graph, - OptimizerAttrs const &optimizer_attrs, - tensor_guid_t const &logit_tensor, - ForwardTensorSource &forward_tensor_source, - GradientTensorSource &gradient_tensor_source, - OptimizerTensorSource &optimizer_tensor_source, - LossTensorSource &loss_tensor_source); - -TrainingTensorGroup - get_training_tensor_group_for_tensor_guid(TrainingComputationGraph const &, - tensor_guid_t); -TrainingTensorGroupWithAttrs - get_training_tensor_group_with_attrs_for_tensor_guid( - TrainingComputationGraph const &, tensor_guid_t); - -forward_tensor_guid_t - get_forward_tensor_guid_for_tensor_guid(TrainingComputationGraph const &, - tensor_guid_t); -gradient_tensor_guid_t - get_gradient_tensor_guid_for_tensor_guid(TrainingComputationGraph const &, - tensor_guid_t); -std::vector - get_optimizer_tensor_guids_for_tensor_guid(TrainingComputationGraph const &, - tensor_guid_t); - -tensor_guid_t - get_tensor_guid_for_forward_tensor_guid(TrainingComputationGraph const &, - forward_tensor_guid_t); -tensor_guid_t - get_tensor_guid_for_gradient_tensor_guid(TrainingComputationGraph const &, - gradient_tensor_guid_t); -tensor_guid_t - get_tensor_guid_for_optimizer_tensor_guid(TrainingComputationGraph const &, - optimizer_tensor_guid_t); - -tensor_guid_t - get_tensor_guid_for_training_tensor_guid(TrainingComputationGraph const &, - training_tensor_guid_t); - -std::unordered_set - get_all_training_tensors_in_training_computation_graph( - TrainingComputationGraph const &); - -TrainingLayerPlusContext - get_training_layer_plus_context(TrainingComputationGraph const &, - layer_guid_t); - -std::unordered_map - get_all_training_tensor_shapes(TrainingComputationGraph const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/training_layer_plus_context.h b/lib/task-spec/include/task-spec/training_layer_plus_context.h deleted file mode 100644 index 4ce1ddf1a9..0000000000 --- a/lib/task-spec/include/task-spec/training_layer_plus_context.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_LAYER_PLUS_CONTEXT_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_LAYER_PLUS_CONTEXT_H - -#include "pcg/cg_operator_tensor_shape_signature.dtg.h" -#include "pcg/tensor_role.dtg.h" -#include "task-spec/training_layer_plus_context.dtg.h" -#include "task-spec/training_layer_tensor_group_signature.dtg.h" - -namespace FlexFlow { - -std::vector - get_training_tensor_groups_with_attrs_for_role( - TrainingLayerPlusContext const &training_layer_plus_context, - TensorRole tensor_role); - -TrainingTensorGroupWithAttrs - get_training_tensor_group_with_attrs_for_role_and_index( - TrainingLayerPlusContext const &training_layer_plus_context, - TensorRole tensor_role, - nonnegative_int index); - -std::vector - get_input_tensors(TrainingLayerPlusContext const &); -std::vector - get_input_grad_tensors(TrainingLayerPlusContext const &); -std::vector - get_input_tensor_shapes(TrainingLayerPlusContext const &); - -std::vector - get_weight_tensors(TrainingLayerPlusContext const &); -std::vector - get_weight_grad_tensors(TrainingLayerPlusContext const &); -std::vector - get_weight_tensor_shapes(TrainingLayerPlusContext const &); - -std::vector - get_output_tensors(TrainingLayerPlusContext const &); -std::vector - get_output_grad_tensors(TrainingLayerPlusContext const &); -std::vector - get_output_tensor_shapes(TrainingLayerPlusContext const &); - -TrainingLayerTensorGroupSignature - get_tensor_group_signature(TrainingLayerPlusContext const &); -CGOperatorTensorShapeSignature - get_cg_op_shape_signature(TrainingLayerPlusContext const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/training_layer_plus_context.struct.toml b/lib/task-spec/include/task-spec/training_layer_plus_context.struct.toml deleted file mode 100644 index 9090059351..0000000000 --- a/lib/task-spec/include/task-spec/training_layer_plus_context.struct.toml +++ /dev/null @@ -1,29 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingLayerPlusContext" -features = [] - -includes = [ - "pcg/layer_guid_t.dtg.h", - "pcg/layer_attrs.dtg.h", - "task-spec/training_tensor_group_with_attrs.dtg.h", -] - -[[fields]] -name = "layer_guid" -type = "::FlexFlow::layer_guid_t" - -[[fields]] -name = "layer_attrs" -type = "::FlexFlow::LayerAttrs" - -[[fields]] -name = "input_tensor_groups" -type = "std::vector<::FlexFlow::TrainingTensorGroupWithAttrs>" - -[[fields]] -name = "weight_tensor_groups" -type = "std::vector<::FlexFlow::TrainingTensorGroupWithAttrs>" - -[[fields]] -name = "output_tensor_groups" -type = "std::vector<::FlexFlow::TrainingTensorGroupWithAttrs>" diff --git a/lib/task-spec/include/task-spec/training_layer_tensor_group_signature.h b/lib/task-spec/include/task-spec/training_layer_tensor_group_signature.h deleted file mode 100644 index 62b11e3af3..0000000000 --- a/lib/task-spec/include/task-spec/training_layer_tensor_group_signature.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_LAYER_TENSOR_GROUP_SIGNATURE_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_LAYER_TENSOR_GROUP_SIGNATURE_H - -#include "pcg/tensor_role.dtg.h" -#include "task-spec/training_layer_tensor_group_signature.dtg.h" -#include "utils/nonnegative_int/nonnegative_int.h" - -namespace FlexFlow { - -std::vector get_training_tensor_groups_for_role( - TrainingLayerTensorGroupSignature const &signature, TensorRole tensor_role); - -TrainingTensorGroup get_training_tensor_group_for_role_and_index( - TrainingLayerTensorGroupSignature const &signature, - TensorRole tensor_role, - nonnegative_int index); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/training_layer_tensor_group_signature.struct.toml b/lib/task-spec/include/task-spec/training_layer_tensor_group_signature.struct.toml deleted file mode 100644 index d9859559a1..0000000000 --- a/lib/task-spec/include/task-spec/training_layer_tensor_group_signature.struct.toml +++ /dev/null @@ -1,19 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingLayerTensorGroupSignature" -features = [] - -includes = [ - "task-spec/training_tensor_group.dtg.h", -] - -[[fields]] -name = "input_tensor_groups" -type = "std::vector<::FlexFlow::TrainingTensorGroup>" - -[[fields]] -name = "weight_tensor_groups" -type = "std::vector<::FlexFlow::TrainingTensorGroup>" - -[[fields]] -name = "output_tensor_groups" -type = "std::vector<::FlexFlow::TrainingTensorGroup>" diff --git a/lib/task-spec/include/task-spec/training_symbolic_computation_graph.h b/lib/task-spec/include/task-spec/training_symbolic_computation_graph.h new file mode 100644 index 0000000000..4cb1fd4c52 --- /dev/null +++ b/lib/task-spec/include/task-spec/training_symbolic_computation_graph.h @@ -0,0 +1,102 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_SYMBOLIC_COMPUTATION_GRAPH_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_SYMBOLIC_COMPUTATION_GRAPH_H + +#include "task-spec/op_task_type.dtg.h" +#include "task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.dtg.h" +#include "task-spec/symbolic_forward_tensor_source.h" +#include "task-spec/symbolic_gradient_tensor_source.h" +#include "task-spec/symbolic_loss_tensor_source.h" +#include "task-spec/symbolic_optimizer_tensor_source.h" +#include "task-spec/runtime_task_invocation.dtg.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.dtg.h" +#include "task-spec/training_symbolic_computation_graph.dtg.h" +#include "task-spec/symbolic_training_layer_attrs_plus_context.dtg.h" +#include "task-spec/symbolic_training_tensor_guid_t.dtg.h" +#include "task-spec/symbolic_training_tensor_group_with_attrs.dtg.h" +#include "task-spec/training_symbolic_computation_graph_from_cg_conversion.dtg.h" +#include "task-spec/training_symbolic_computation_graph_from_pcg_conversion.dtg.h" +#include "task-spec/symbolic_layer_guid_t.dtg.h" +#include "task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.dtg.h" + +namespace FlexFlow { + +TensorShape get_symbolic_tensor_shape(TrainingSymbolicComputationGraph const &, + symbolic_tensor_guid_t); + +PCGOperatorAttrs get_op_attrs_for_symbolic_layer_guid(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t); + +SymbolicLayerTrainingTensorGroupSignatureWithShapes + get_signature_with_shapes_for_symbolic_layer_guid(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t); + +symbolic_forward_tensor_guid_t get_forward_symbolic_tensor_guid_for_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_tensor_guid_t); + +symbolic_gradient_tensor_guid_t get_gradient_symbolic_tensor_guid_for_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_tensor_guid_t); + +std::vector get_optimizer_tensor_guids_for_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_tensor_guid_t); + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_forward_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_forward_tensor_guid_t); + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_gradient_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_gradient_tensor_guid_t); + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_optimizer_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_optimizer_tensor_guid_t); + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_training_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_training_tensor_guid_t); + +std::unordered_set + get_all_symbolic_training_tensors_in_training_computation_graph( + TrainingSymbolicComputationGraph const &); + +std::vector + symbolic_cg_topological_ordering(TrainingSymbolicComputationGraph const &); + +SymbolicTrainingLayerAttrsPlusContext + get_symbolic_training_layer_attrs_plus_context(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t); + +std::unordered_map + get_all_symbolic_training_tensor_shapes(TrainingSymbolicComputationGraph const &); + +SymbolicCgOpAttrsAndTrainingSignatureWithShapes + get_attrs_and_signature_for_layer(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t); + +std::optional + get_init_runtime_task_invocation_for_layer(symbolic_layer_guid_t, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &); + +std::optional + get_forward_runtime_task_invocation_for_layer(symbolic_layer_guid_t, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &); + +std::optional + get_backward_runtime_task_invocation_for_layer(symbolic_layer_guid_t, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &); + +std::optional + get_runtime_task_invocation_for_layer_and_type(symbolic_layer_guid_t, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &, + OpTaskType); + +RuntimeTaskInvocation + get_compute_loss_runtime_task_invocation(LossAttrs const &, + symbolic_forward_tensor_guid_t loss_fwd_tensor, + symbolic_gradient_tensor_guid_t loss_grad_tensor, + symbolic_loss_tensor_guid_t label_tensor); + +std::optional + get_update_runtime_task_invocation_for_layer(SymbolicTrainingLayerAttrsPlusContext const &, + OptimizerAttrs const &); + +} // namespace FlexFlow + +#endif + diff --git a/lib/task-spec/include/task-spec/training_symbolic_computation_graph.struct.toml b/lib/task-spec/include/task-spec/training_symbolic_computation_graph.struct.toml new file mode 100644 index 0000000000..cc52e49494 --- /dev/null +++ b/lib/task-spec/include/task-spec/training_symbolic_computation_graph.struct.toml @@ -0,0 +1,38 @@ +namespace = "FlexFlow" +name = "TrainingSymbolicComputationGraph" +features = [] + +includes = [ + "utils/graph/labelled_dataflow_graph/labelled_dataflow_graph.h", + "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h", + "", + "task-spec/symbolic_tensor_guid_t.dtg.h", + "task-spec/symbolic_training_tensor_group.dtg.h", + "task-spec/symbolic_loss_tensor_guid_t.dtg.h", + "op-attrs/ops/loss_functions/loss_attrs.dtg.h", + "pcg/optimizer_attrs.dtg.h", +] + +[[fields]] +name = "symbolic_computation_graph" +type = "::FlexFlow::LabelledDataflowGraphView<::FlexFlow::ParallelLayerAttrs, ::FlexFlow::TensorShape>" + +[[fields]] +name = "symbolic_training_tensor_group_for_tensor" +type = "std::unordered_map<::FlexFlow::symbolic_tensor_guid_t, ::FlexFlow::SymbolicTrainingTensorGroup>" + +[[fields]] +name = "loss_attrs" +type = "::FlexFlow::LossAttrs" + +[[fields]] +name = "optimizer_attrs" +type = "::FlexFlow::OptimizerAttrs" + +[[fields]] +name = "logit_tensor" +type = "::FlexFlow::symbolic_tensor_guid_t" + +[[fields]] +name = "label_tensor" +type = "::FlexFlow::symbolic_loss_tensor_guid_t" diff --git a/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_cg_conversion.h b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_cg_conversion.h new file mode 100644 index 0000000000..272acb61a3 --- /dev/null +++ b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_cg_conversion.h @@ -0,0 +1,51 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_SYMBOLIC_COMPUTATION_GRAPH_FROM_CG_CONVERSION_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_SYMBOLIC_COMPUTATION_GRAPH_FROM_CG_CONVERSION_H + +#include "pcg/computation_graph.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "task-spec/symbolic_forward_tensor_source.h" +#include "task-spec/symbolic_gradient_tensor_source.h" +#include "task-spec/symbolic_loss_tensor_source.h" +#include "task-spec/symbolic_optimizer_tensor_source.h" +#include "task-spec/training_symbolic_computation_graph_from_cg_conversion.dtg.h" +#include "task-spec/symbolic_training_tensor_group_with_attrs.dtg.h" + +namespace FlexFlow { + +TrainingSymbolicComputationGraphFromCgConversion generate_training_computation_graph_from_cg( + ComputationGraph const &computation_graph, + OptimizerAttrs const &optimizer_attrs, + tensor_guid_t const &logit_tensor, + SymbolicForwardTensorSource &forward_tensor_source, + SymbolicGradientTensorSource &gradient_tensor_source, + SymbolicOptimizerTensorSource &optimizer_tensor_source, + SymbolicLossTensorSource &loss_tensor_source); + +SymbolicTrainingTensorGroup + get_training_tensor_group_for_tensor_guid(TrainingSymbolicComputationGraphFromCgConversion const &, + tensor_guid_t); + +SymbolicTrainingTensorGroupWithAttrs + get_training_tensor_group_with_attrs_for_tensor_guid( + TrainingSymbolicComputationGraphFromCgConversion const &, + tensor_guid_t); + +symbolic_layer_guid_t + get_symbolic_layer_guid_for_layer_guid(TrainingSymbolicComputationGraphFromCgConversion const &, + layer_guid_t); + +symbolic_tensor_guid_t + get_symbolic_tensor_guid_for_tensor_guid(TrainingSymbolicComputationGraphFromCgConversion const &, + tensor_guid_t); + +tensor_guid_t + get_tensor_guid_for_symbolic_tensor_guid(TrainingSymbolicComputationGraphFromCgConversion const &, + symbolic_tensor_guid_t); + +layer_guid_t + get_layer_guid_for_symbolic_layer_guid(TrainingSymbolicComputationGraphFromCgConversion const &, + symbolic_layer_guid_t); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_cg_conversion.struct.toml b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_cg_conversion.struct.toml new file mode 100644 index 0000000000..f51b04414d --- /dev/null +++ b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_cg_conversion.struct.toml @@ -0,0 +1,25 @@ +namespace = "FlexFlow" +name = "TrainingSymbolicComputationGraphFromCgConversion" +features = [] + +includes = [ + "task-spec/training_symbolic_computation_graph.dtg.h", + "utils/bidict/bidict.h", + "pcg/tensor_guid_t.dtg.h", + "task-spec/symbolic_training_tensor_guid_t.dtg.h", + "task-spec/symbolic_tensor_guid_t.dtg.h", + "task-spec/symbolic_layer_guid_t.dtg.h", + "pcg/layer_guid_t.dtg.h", +] + +[[fields]] +name = "training_symbolic_computation_graph" +type = "::FlexFlow::TrainingSymbolicComputationGraph" + +[[fields]] +name = "tensor_mapping" +type = "::FlexFlow::bidict<::FlexFlow::tensor_guid_t, ::FlexFlow::symbolic_tensor_guid_t>" + +[[fields]] +name = "layer_mapping" +type = "::FlexFlow::bidict<::FlexFlow::layer_guid_t, ::FlexFlow::symbolic_layer_guid_t>" diff --git a/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_pcg_conversion.h b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_pcg_conversion.h new file mode 100644 index 0000000000..11a297189d --- /dev/null +++ b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_pcg_conversion.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_SYMBOLIC_COMPUTATION_GRAPH_FROM_PCG_CONVERSION_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_SYMBOLIC_COMPUTATION_GRAPH_FROM_PCG_CONVERSION_H + +#include "pcg/optimizer_attrs.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" +#include "task-spec/symbolic_forward_tensor_source.h" +#include "task-spec/symbolic_gradient_tensor_source.h" +#include "task-spec/symbolic_loss_tensor_source.h" +#include "task-spec/symbolic_optimizer_tensor_source.h" +#include "task-spec/training_symbolic_computation_graph_from_pcg_conversion.dtg.h" + + +namespace FlexFlow { + +TrainingSymbolicComputationGraphFromPcgConversion generate_training_computation_graph_from_pcg( + ParallelComputationGraph const &computation_graph, + OptimizerAttrs const &optimizer_attrs, + parallel_tensor_guid_t const &logit_tensor, + SymbolicForwardTensorSource &forward_tensor_source, + SymbolicGradientTensorSource &gradient_tensor_source, + SymbolicOptimizerTensorSource &optimizer_tensor_source, + SymbolicLossTensorSource &loss_tensor_source); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_pcg_conversion.struct.toml b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_pcg_conversion.struct.toml new file mode 100644 index 0000000000..0d479ff0ed --- /dev/null +++ b/lib/task-spec/include/task-spec/training_symbolic_computation_graph_from_pcg_conversion.struct.toml @@ -0,0 +1,25 @@ +namespace = "FlexFlow" +name = "TrainingSymbolicComputationGraphFromPcgConversion" +features = [] + +includes = [ + "task-spec/training_symbolic_computation_graph.dtg.h", + "utils/bidict/bidict.h", + "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h", + "task-spec/symbolic_training_tensor_guid_t.dtg.h", + "task-spec/symbolic_tensor_guid_t.dtg.h", + "task-spec/symbolic_layer_guid_t.dtg.h", + "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", +] + +[[fields]] +name = "training_symbolic_computation_graph" +type = "::FlexFlow::TrainingSymbolicComputationGraph" + +[[fields]] +name = "tensor_mapping" +type = "::FlexFlow::bidict<::FlexFlow::parallel_tensor_guid_t, ::FlexFlow::symbolic_tensor_guid_t>" + +[[fields]] +name = "layer_mapping" +type = "::FlexFlow::bidict<::FlexFlow::parallel_layer_guid_t, ::FlexFlow::symbolic_layer_guid_t>" diff --git a/lib/task-spec/include/task-spec/training_tensor_group.h b/lib/task-spec/include/task-spec/training_tensor_group.h deleted file mode 100644 index 40269ceab0..0000000000 --- a/lib/task-spec/include/task-spec/training_tensor_group.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_TRAINING_TENSOR_GROUP_H -#define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_TRAINING_TENSOR_GROUP_H - -#include "pcg/optimizer_attrs.dtg.h" -#include "pcg/tensor_attrs.dtg.h" -#include "pcg/tensor_guid_t.dtg.h" -#include "task-spec/forward_tensor_source.h" -#include "task-spec/gradient_tensor_source.h" -#include "task-spec/optimizer_tensor_source.h" -#include "task-spec/training_tensor_group.dtg.h" -#include "task-spec/training_tensor_guid_t.dtg.h" - -namespace FlexFlow { - -TrainingTensorGroup make_training_tensor_group_for_tensor_guid_t( - tensor_guid_t tensor_guid, - TensorAttrs const &tensor_attrs, - OptimizerAttrs const &optimizer_attrs, - ForwardTensorSource &forward_tensor_source, - GradientTensorSource &gradient_tensor_source, - OptimizerTensorSource &optimizer_tensor_source); - -std::unordered_set - get_all_training_tensors_in_tensor_group(TrainingTensorGroup const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/training_tensor_group.struct.toml b/lib/task-spec/include/task-spec/training_tensor_group.struct.toml deleted file mode 100644 index eadaac08ad..0000000000 --- a/lib/task-spec/include/task-spec/training_tensor_group.struct.toml +++ /dev/null @@ -1,31 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingTensorGroup" -features = [ - "eq", - "ord", - "fmt", - "hash", -] - -includes = [ - "task-spec/forward_tensor_guid_t.dtg.h", - "task-spec/gradient_tensor_guid_t.dtg.h", - "task-spec/optimizer_tensor_guid_t.dtg.h", -] - -src_includes = [ - "utils/hash/vector.h", - "utils/fmt/vector.h", -] - -[[fields]] -name = "forward_tensor" -type = "::FlexFlow::forward_tensor_guid_t" - -[[fields]] -name = "gradient_tensor" -type = "::FlexFlow::gradient_tensor_guid_t" - -[[fields]] -name = "optimizer_tensors" -type = "std::vector<::FlexFlow::optimizer_tensor_guid_t>" diff --git a/lib/task-spec/include/task-spec/training_tensor_group_with_attrs.h b/lib/task-spec/include/task-spec/training_tensor_group_with_attrs.h deleted file mode 100644 index 2560228b1c..0000000000 --- a/lib/task-spec/include/task-spec/training_tensor_group_with_attrs.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_TENSOR_GROUP_WITH_ATTRS_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_TENSOR_GROUP_WITH_ATTRS_H - -#include "task-spec/training_tensor_group.dtg.h" -#include "task-spec/training_tensor_group_with_attrs.dtg.h" - -namespace FlexFlow { - -TrainingTensorGroupWithAttrs - make_training_tensor_group_with_attrs_from_group_and_attrs( - TrainingTensorGroup const &group, TensorAttrs const &attrs); - -TrainingTensorGroup - tensor_group_without_attrs(TrainingTensorGroupWithAttrs const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/training_tensor_group_with_attrs.struct.toml b/lib/task-spec/include/task-spec/training_tensor_group_with_attrs.struct.toml deleted file mode 100644 index 5816214fb3..0000000000 --- a/lib/task-spec/include/task-spec/training_tensor_group_with_attrs.struct.toml +++ /dev/null @@ -1,37 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingTensorGroupWithAttrs" -features = [ - "eq", - "ord", - "fmt", - "hash", -] - -includes = [ - "pcg/tensor_attrs.dtg.h", - "task-spec/forward_tensor_guid_t.dtg.h", - "task-spec/gradient_tensor_guid_t.dtg.h", - "task-spec/optimizer_tensor_guid_t.dtg.h", -] - -src_includes = [ - "utils/hash/vector.h", - "utils/fmt/vector.h", -] - -[[fields]] -name = "tensor_attrs" -type = "::FlexFlow::TensorAttrs" - -[[fields]] -name = "forward_tensor" -type = "::FlexFlow::forward_tensor_guid_t" - -[[fields]] -name = "gradient_tensor" -type = "::FlexFlow::gradient_tensor_guid_t" - -[[fields]] -name = "optimizer_tensors" -type = "std::vector<::FlexFlow::optimizer_tensor_guid_t>" - diff --git a/lib/task-spec/include/task-spec/training_tensor_guid_t.variant.toml b/lib/task-spec/include/task-spec/training_tensor_guid_t.variant.toml deleted file mode 100644 index d2520dacbf..0000000000 --- a/lib/task-spec/include/task-spec/training_tensor_guid_t.variant.toml +++ /dev/null @@ -1,31 +0,0 @@ -namespace = "FlexFlow" -name = "training_tensor_guid_t" -features = [ - "eq", - "ord", - "hash", - "fmt", -] - -includes = [ - "task-spec/forward_tensor_guid_t.dtg.h", - "task-spec/optimizer_tensor_guid_t.dtg.h", - "task-spec/gradient_tensor_guid_t.dtg.h", - "task-spec/loss_tensor_guid_t.dtg.h" -] - -[[values]] -type = "::FlexFlow::forward_tensor_guid_t" -key = "forward_tensor" - -[[values]] -type = "::FlexFlow::gradient_tensor_guid_t" -key = "gradient_tensor" - -[[values]] -type = "::FlexFlow::optimizer_tensor_guid_t" -key = "optimizer_tensor" - -[[values]] -type = "::FlexFlow::loss_tensor_guid_t" -key = "loss_tensor" diff --git a/lib/task-spec/include/task-spec/training_tensor_slot_binding.struct.toml b/lib/task-spec/include/task-spec/training_tensor_slot_binding.struct.toml new file mode 100644 index 0000000000..618248b544 --- /dev/null +++ b/lib/task-spec/include/task-spec/training_tensor_slot_binding.struct.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "TrainingTensorSlotBinding" +features = [ + "eq", + "ord", + "hash", + "fmt", +] + +includes = [ + "task-spec/training_tensor_slot_id_t.dtg.h", + "task-spec/symbolic_training_tensor_guid_t.dtg.h", +] + +[[fields]] +name = "slot" +type = "::FlexFlow::training_tensor_slot_id_t" + +[[fields]] +name = "bound" +type = "::FlexFlow::symbolic_training_tensor_guid_t" diff --git a/lib/task-spec/include/task-spec/training_tensor_slot_id_t.h b/lib/task-spec/include/task-spec/training_tensor_slot_id_t.h new file mode 100644 index 0000000000..03b58e88bc --- /dev/null +++ b/lib/task-spec/include/task-spec/training_tensor_slot_id_t.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_TENSOR_SLOT_ID_T_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TRAINING_TENSOR_SLOT_ID_T_H + +#include "task-spec/fwb_tensor_slot_id_t.dtg.h" +#include "task-spec/training_tensor_slot_id_t.dtg.h" + +namespace FlexFlow { + +training_tensor_slot_id_t + training_tensor_slot_from_fwb_slot(fwb_tensor_slot_id_t); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/tensor_sub_slot_id_t.struct.toml b/lib/task-spec/include/task-spec/training_tensor_slot_id_t.struct.toml similarity index 64% rename from lib/task-spec/include/task-spec/tensor_sub_slot_id_t.struct.toml rename to lib/task-spec/include/task-spec/training_tensor_slot_id_t.struct.toml index a830725a27..45d5330f5f 100644 --- a/lib/task-spec/include/task-spec/tensor_sub_slot_id_t.struct.toml +++ b/lib/task-spec/include/task-spec/training_tensor_slot_id_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "tensor_sub_slot_id_t" +name = "training_tensor_slot_id_t" features = [ "eq", "ord", @@ -8,7 +8,7 @@ features = [ ] includes = [ - "task-spec/tensor_type.dtg.h", + "task-spec/training_tensor_type.dtg.h", "task-spec/slot_id_t.dtg.h", ] @@ -18,4 +18,4 @@ type = "::FlexFlow::slot_id_t" [[fields]] name = "tensor_type" -type = "::FlexFlow::TensorType" +type = "::FlexFlow::TrainingTensorType" diff --git a/lib/task-spec/include/task-spec/tensor_type.enum.toml b/lib/task-spec/include/task-spec/training_tensor_type.enum.toml similarity index 87% rename from lib/task-spec/include/task-spec/tensor_type.enum.toml rename to lib/task-spec/include/task-spec/training_tensor_type.enum.toml index b1ae8fa667..d68bdd4216 100644 --- a/lib/task-spec/include/task-spec/tensor_type.enum.toml +++ b/lib/task-spec/include/task-spec/training_tensor_type.enum.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "TensorType" +name = "TrainingTensorType" features = [ "hash", "fmt", diff --git a/lib/task-spec/src/task-spec/arg_ref.cc b/lib/task-spec/src/task-spec/arg_ref.cc new file mode 100644 index 0000000000..2221fe5932 --- /dev/null +++ b/lib/task-spec/src/task-spec/arg_ref.cc @@ -0,0 +1,11 @@ +#include "task-spec/arg_ref.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using LABEL_TYPE = value_type<0>; +using T = value_type<1>; + +template struct ArgRef; + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/arg_ref_spec.cc b/lib/task-spec/src/task-spec/arg_ref_spec.cc new file mode 100644 index 0000000000..4f53ae0711 --- /dev/null +++ b/lib/task-spec/src/task-spec/arg_ref_spec.cc @@ -0,0 +1,10 @@ +#include "task-spec/arg_ref_spec.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using LABEL_TYPE = value_type<0>; + +template struct ArgRefSpec; + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/device_specific.cc b/lib/task-spec/src/task-spec/device_specific.cc new file mode 100644 index 0000000000..153b044851 --- /dev/null +++ b/lib/task-spec/src/task-spec/device_specific.cc @@ -0,0 +1,10 @@ +#include "task-spec/device_specific.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template struct DeviceSpecific; + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/forward_tensor_source.cc b/lib/task-spec/src/task-spec/forward_tensor_source.cc deleted file mode 100644 index 3d82452377..0000000000 --- a/lib/task-spec/src/task-spec/forward_tensor_source.cc +++ /dev/null @@ -1,18 +0,0 @@ -#include "task-spec/forward_tensor_source.h" - -namespace FlexFlow { - -int ForwardTensorSource::next_available_forward_tensor_id = 0; - -ForwardTensorSource::ForwardTensorSource() {} - -forward_tensor_guid_t ForwardTensorSource::new_forward_tensor() { - return forward_tensor_guid_t{ - ForwardTensorSource::next_available_forward_tensor_id++}; -} - -void ForwardTensorSource::reset() { - ForwardTensorSource::next_available_forward_tensor_id = 0; -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/fwb_op_task_type.cc b/lib/task-spec/src/task-spec/fwb_op_task_type.cc new file mode 100644 index 0000000000..17b607d179 --- /dev/null +++ b/lib/task-spec/src/task-spec/fwb_op_task_type.cc @@ -0,0 +1,19 @@ +#include "task-spec/fwb_op_task_type.h" + +namespace FlexFlow { + +std::optional + op_task_type_from_fwb_op_task_type(FwbOpTaskType fwb) { + + switch (fwb) { + case FwbOpTaskType::FWD: + return OpTaskType::FWD; + case FwbOpTaskType::BWD: + return OpTaskType::BWD; + default: + return std::nullopt; + }; +} + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/gradient_tensor_source.cc b/lib/task-spec/src/task-spec/gradient_tensor_source.cc deleted file mode 100644 index 8bc5034634..0000000000 --- a/lib/task-spec/src/task-spec/gradient_tensor_source.cc +++ /dev/null @@ -1,18 +0,0 @@ -#include "task-spec/gradient_tensor_source.h" - -namespace FlexFlow { - -int GradientTensorSource::next_available_gradient_tensor_id = 0; - -GradientTensorSource::GradientTensorSource() {} - -gradient_tensor_guid_t GradientTensorSource::new_gradient_tensor() { - return gradient_tensor_guid_t{ - GradientTensorSource::next_available_gradient_tensor_id++}; -} - -void GradientTensorSource::reset() { - GradientTensorSource::next_available_gradient_tensor_id = 0; -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/loss_functions.cc b/lib/task-spec/src/task-spec/loss_functions.cc index 698ca941d3..d1cdd9c035 100644 --- a/lib/task-spec/src/task-spec/loss_functions.cc +++ b/lib/task-spec/src/task-spec/loss_functions.cc @@ -23,11 +23,11 @@ namespace FlexFlow { enum Slots { LOGIT, LABEL, LOGIT_GRAD, ATTRS, PROFILING, KERNEL_DEVICE_TYPE }; -TaskSignature get_loss_bwd_signature() { - TaskSignature sig = make_empty_task_signature(); - add_slot(sig, LOGIT, TensorType::FORWARD); - add_slot(sig, LABEL, TensorType::LOSS); - add_slot(sig, LOGIT_GRAD, TensorType::GRADIENT); +RuntimeTaskSignature get_loss_bwd_signature() { + RuntimeTaskSignature sig = make_empty_runtime_task_signature(); + add_slot(sig, LOGIT, TrainingTensorType::FORWARD); + add_slot(sig, LABEL, TrainingTensorType::LOSS); + add_slot(sig, LOGIT_GRAD, TrainingTensorType::GRADIENT); add_arg_slot(sig, ATTRS); add_arg_slot(sig, PROFILING); @@ -35,11 +35,11 @@ TaskSignature get_loss_bwd_signature() { return sig; } -TaskInvocation backward(LossAttrs const &attrs, - forward_tensor_guid_t logit, - gradient_tensor_guid_t logit_grad, - loss_tensor_guid_t label) { - TaskBinding b; +RuntimeTaskInvocation loss_attrs_backward(LossAttrs const &attrs, + symbolic_forward_tensor_guid_t logit, + symbolic_gradient_tensor_guid_t logit_grad, + symbolic_loss_tensor_guid_t label) { + RuntimeTaskBinding b; b.bind(LOGIT, logit); b.bind_loss(LABEL, label); b.bind_grad(LOGIT_GRAD, logit_grad); @@ -48,7 +48,7 @@ TaskInvocation backward(LossAttrs const &attrs, b.bind_arg(PROFILING, profiling_settings()); b.bind_arg(KERNEL_DEVICE_TYPE, kernel_device_type()); - return TaskInvocation{task_id_t::LOSS_BWD_TASK_ID, b}; + return RuntimeTaskInvocation{task_id_t::LOSS_BWD_TASK_ID, b}; } static void backward_task_impl(TaskArgumentAccessor const &acc) { @@ -75,7 +75,7 @@ static void backward_task_impl(TaskArgumentAccessor const &acc) { if (loss_type == LossFunction::SPARSE_CATEGORICAL_CROSSENTROPY) { // label shape is [batch dim, 1] auto scce_attrs = attrs.get(); - size_t ndim = get_num_dims(logit.shape.dims).unwrap_nonnegative(); + size_t ndim = get_num_dims(logit.shape.dims).int_from_num_tensor_dims(); int num_classes = dim_at_idx(logit.shape.dims, legion_dim_t{0_n}).int_from_positive_int(); ASSERT(logit_grad.shape == logit.shape); diff --git a/lib/task-spec/src/task-spec/loss_tensor_source.cc b/lib/task-spec/src/task-spec/loss_tensor_source.cc deleted file mode 100644 index 13b97fd604..0000000000 --- a/lib/task-spec/src/task-spec/loss_tensor_source.cc +++ /dev/null @@ -1,13 +0,0 @@ -#include "task-spec/loss_tensor_source.h" - -namespace FlexFlow { - -nonnegative_int LossTensorSource::next_available_loss_tensor_id = 0_n; - -LossTensorSource::LossTensorSource() {} - -loss_tensor_guid_t LossTensorSource::new_loss_tensor() { - return loss_tensor_guid_t{LossTensorSource::next_available_loss_tensor_id++}; -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/lower_op_task_invocation_to_runtime_task_invocation.cc b/lib/task-spec/src/task-spec/lower_op_task_invocation_to_runtime_task_invocation.cc new file mode 100644 index 0000000000..7a2cd55839 --- /dev/null +++ b/lib/task-spec/src/task-spec/lower_op_task_invocation_to_runtime_task_invocation.cc @@ -0,0 +1,186 @@ +#include "task-spec/lower_op_task_invocation_to_runtime_task_invocation.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "pcg/computation_graph.h" +#include "task-spec/fwb_tensor_slot_id_t.dtg.h" +#include "task-spec/symbolic_layer_tensor_shape_signature.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.h" +#include "task-spec/training_tensor_slot_id_t.h" +#include "utils/containers/map_values.h" +#include "utils/containers/transform.h" +#include "utils/overload.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.h" + +namespace FlexFlow { + +RuntimeTaskInvocation + lower_op_task_invocation_to_task_invocation( + OpTaskInvocation const &op_task_invocation, + symbolic_layer_guid_t symbolic_layer_guid, + SymbolicLayerTrainingTensorGroupSignatureWithShapes const &layer_signature) { + + std::unordered_map + tensor_bindings = + transform(op_task_invocation.binding.get_tensor_bindings(), + [&](fwb_tensor_slot_id_t const &fwb_slot_id, + OpTensorSpec const &op_tensor_spec) { + FwbTensorSlotBinding fwb_binding = FwbTensorSlotBinding{ + fwb_slot_id, + op_tensor_spec, + }; + + TrainingTensorSlotBinding training_binding = + lower_fwb_tensor_binding_to_training_tensor_binding( + drop_shapes_from_signature(layer_signature), + fwb_binding); + + return std::pair{ + training_binding.slot, + training_binding.bound, + }; + }); + + std::unordered_map arg_bindings = map_values( + op_task_invocation.binding.get_arg_bindings(), + [&](OpArgSpec const &op_arg_spec) -> RuntimeArgSpec { + return lower_op_arg_spec_to_runtime_arg_spec(op_arg_spec, + symbolic_layer_guid, + get_shape_signature(layer_signature)); + }); + + return RuntimeTaskInvocation{ + op_task_invocation.task_id, + RuntimeTaskBinding{ + tensor_bindings, + arg_bindings, + }, + }; +} + +TrainingTensorSlotBinding + lower_fwb_tensor_binding_to_training_tensor_binding( + SymbolicLayerTrainingTensorGroupSignature const &signature, + FwbTensorSlotBinding const &fwb_slot_binding) { + fwb_tensor_slot_id_t fwb_slot_id = fwb_slot_binding.slot; + OpTensorSpec op_tensor_spec = fwb_slot_binding.bound; + + SymbolicTrainingTensorGroup group = get_training_tensor_group_for_role_and_index( + signature, op_tensor_spec.role, op_tensor_spec.idx); + + training_tensor_slot_id_t training_tensor_slot = + training_tensor_slot_from_fwb_slot(fwb_slot_id); + + symbolic_training_tensor_guid_t training_tensor = [&]() -> symbolic_training_tensor_guid_t { + switch (fwb_slot_id.is_grad) { + case IsGrad::NO: + return symbolic_training_tensor_guid_t{ + group.forward_tensor, + }; + case IsGrad::YES: + return symbolic_training_tensor_guid_t{ + group.gradient_tensor, + }; + default: + PANIC("Invalid value for IsGrad {}", fwb_slot_id.is_grad); + } + }(); + + return TrainingTensorSlotBinding{ + training_tensor_slot, + training_tensor, + }; +} + +RuntimeArgSpec lower_op_arg_spec_to_runtime_arg_spec( + OpArgSpec const &op_arg_spec, + symbolic_layer_guid_t symbolic_layer_guid, + SymbolicLayerTensorShapeSignature const &op_shape_signature) { + return op_arg_spec.visit(overload{ + [](ConcreteArgSpec const &concrete_arg_spec) -> RuntimeArgSpec { + return RuntimeArgSpec{concrete_arg_spec}; + }, + [](RuntimeArgRefSpec const &runtime_arg_ref_spec) -> RuntimeArgSpec { + return RuntimeArgSpec{runtime_arg_ref_spec}; + }, + [&](OpArgRefSpec const &op_arg_ref_spec) -> RuntimeArgSpec { + return + lower_op_arg_ref_spec_to_runtime_arg_spec(op_arg_ref_spec, + symbolic_layer_guid, + op_shape_signature); + }, + }); +} + +RuntimeArgSpec lower_op_arg_ref_spec_to_runtime_arg_spec( + OpArgRefSpec const &op_arg_ref_spec, + symbolic_layer_guid_t symbolic_layer_guid, + SymbolicLayerTensorShapeSignature const &op_signature) { + + OpArgRefType op_arg_ref_type = op_arg_ref_spec.get_ref_type(); + return op_arg_ref_type.visit(overload{ + [&](PerDeviceOpStateRefType const &) -> RuntimeArgSpec { + return RuntimeArgSpec{ + RuntimeArgRefSpec::create(per_device_op_state_for_layer(symbolic_layer_guid)), + }; + }, + [&](ParallelTensorShapeRefType const &ref_type) -> RuntimeArgSpec { + TensorShape tensor_shape = tensor_shape_for_role_and_index( + /*signature=*/op_signature, + /*tensor_role=*/ref_type.tensor_role, + /*index=*/ref_type.idx); + ParallelTensorShape shape = lift_to_parallel(tensor_shape); + return RuntimeArgSpec{ + ConcreteArgSpec::create(shape), + }; + }, + }); +} + +ConcreteArgSpec + lower_runtime_arg_ref_spec_to_concrete_arg_spec(RuntimeArgRefSpec const &runtime_arg_ref_spec, + RuntimeArgConfig const &runtime_arg_config, + DeviceSpecific const &handle, + std::function const &get_op_state_for_layer) { + RuntimeArgRefType ref_type = runtime_arg_ref_spec.get_ref_type(); + + return ref_type.visit(overload { + [&](ArgumentlessRuntimeArgRefType argumentless_ref_type) + -> ConcreteArgSpec + { + return lower_argumentless_arg_ref_to_concrete_arg_spec( + argumentless_ref_type, + runtime_arg_config, + handle); + }, + [&](PerDeviceOpStateRuntimeArgRefType op_state_ref_type) + -> ConcreteArgSpec + { + DeviceSpecificPerDeviceOpState op_state = get_op_state_for_layer(op_state_ref_type.layer); + + return ConcreteArgSpec::create(op_state); + } + }); +} + +ConcreteArgSpec lower_argumentless_arg_ref_to_concrete_arg_spec( + ArgumentlessRuntimeArgRefType ref_type, + RuntimeArgConfig const &runtime_arg_config, + DeviceSpecific const &handle) { + + switch (ref_type) { + case ArgumentlessRuntimeArgRefType::FF_HANDLE: + return ConcreteArgSpec::create(handle); + case ArgumentlessRuntimeArgRefType::PROFILING_SETTINGS: + return ConcreteArgSpec::create(runtime_arg_config.profiling_settings); + case ArgumentlessRuntimeArgRefType::FF_ITERATION_CONFIG: + PANIC("FF_ITERATION_CONFIG is currently not handled. Please create an " + "issue or contact the FlexFlow train developers if you need this " + "feature."); + case ArgumentlessRuntimeArgRefType::KERNEL_DEVICE_TYPE: + return ConcreteArgSpec::create(runtime_arg_config.kernel_device_type); + default: + PANIC("Unhandled RuntimeArgRefType", ref_type); + } +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/op_arg_ref_spec.cc b/lib/task-spec/src/task-spec/op_arg_ref_spec.cc new file mode 100644 index 0000000000..3b02ad9cc9 --- /dev/null +++ b/lib/task-spec/src/task-spec/op_arg_ref_spec.cc @@ -0,0 +1 @@ +#include "task-spec/op_arg_ref_spec.h" diff --git a/lib/task-spec/src/task-spec/op_ordered_slot_signature.cc b/lib/task-spec/src/task-spec/op_ordered_slot_signature.cc new file mode 100644 index 0000000000..fc9387ce76 --- /dev/null +++ b/lib/task-spec/src/task-spec/op_ordered_slot_signature.cc @@ -0,0 +1,44 @@ +#include "task-spec/op_ordered_slot_signature.h" +#include "utils/containers/filtermap_values.h" +#include "utils/containers/repeat.h" + +namespace FlexFlow { + +OpOrderedSlotSignature get_op_ordered_slot_signature_for_binding(OpTaskBinding const &binding, + nonnegative_int num_inputs, + nonnegative_int num_weights, + nonnegative_int num_outputs) { + auto get_ordered_slots_for_role = [&](TensorRole tensor_role) + -> std::unordered_map + { + return filtermap_values(binding.get_tensor_bindings(), + [&](OpTensorSpec const &op_tensor_spec) -> std::optional { + if (op_tensor_spec.role == tensor_role) { + return op_tensor_spec.idx; + } else { + return std::nullopt; + } + }); + }; + + auto to_set_vector = [](nonnegative_int num, std::unordered_map const &m) + -> std::vector> + { + std::vector> set_vector + = repeat(num, []() { return std::unordered_set{}; }); + for (auto const &[slot, idx] : m) { + set_vector.at(idx.unwrap_nonnegative()).insert(slot); + } + return set_vector; + }; + + return OpOrderedSlotSignature{ + /*input_slots=*/to_set_vector(num_inputs, get_ordered_slots_for_role(TensorRole::INPUT)), + /*weight_slots=*/to_set_vector(num_weights, get_ordered_slots_for_role(TensorRole::WEIGHT)), + /*output_slots=*/to_set_vector(num_outputs, get_ordered_slots_for_role(TensorRole::OUTPUT)), + }; +} + + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/op_task_invocation.cc b/lib/task-spec/src/task-spec/op_task_invocation.cc index a55995920a..f3239ce13d 100644 --- a/lib/task-spec/src/task-spec/op_task_invocation.cc +++ b/lib/task-spec/src/task-spec/op_task_invocation.cc @@ -20,7 +20,7 @@ void OpTaskBinding::bind(int slot, OpTensorSpec const &tensor_spec) { } void OpTaskBinding::bind(slot_id_t slot, OpTensorSpec const &tensor_spec) { - this->tensor_bindings.insert({SlotGradId{slot, IsGrad::NO}, tensor_spec}); + this->tensor_bindings.insert({fwb_tensor_slot_id_t{slot, IsGrad::NO}, tensor_spec}); } void OpTaskBinding::bind_grad(int slot, OpTensorSpec const &tensor_spec) { @@ -28,7 +28,7 @@ void OpTaskBinding::bind_grad(int slot, OpTensorSpec const &tensor_spec) { } void OpTaskBinding::bind_grad(slot_id_t slot, OpTensorSpec const &tensor_spec) { - this->tensor_bindings.insert({SlotGradId{slot, IsGrad::YES}, tensor_spec}); + this->tensor_bindings.insert({fwb_tensor_slot_id_t{slot, IsGrad::YES}, tensor_spec}); } void OpTaskBinding::insert_arg_spec(slot_id_t name, OpArgSpec const &arg_spec) { @@ -44,13 +44,13 @@ bool OpTaskBinding::operator!=(OpTaskBinding const &other) const { return this->tie() != other.tie(); } -std::tuple const &, +std::tuple const &, std::unordered_map const &> OpTaskBinding::tie() const { return std::tie(this->tensor_bindings, this->arg_bindings); } -std::unordered_map const & +std::unordered_map const & OpTaskBinding::get_tensor_bindings() const { return this->tensor_bindings; } @@ -83,7 +83,7 @@ bool is_tensor_invocation_valid(OpTaskSignature const &sig, OpTaskInvocation const &inv) { // TODO: fix for variadic inputs (need to implement .bind() for variadic // first) - for (std::pair const &tensor_binding : + for (std::pair const &tensor_binding : inv.binding.get_tensor_bindings()) { OpTensorSlotSpec op_tensor_slot_spec = OpTensorSlotSpec{tensor_binding.first.slot_id, diff --git a/lib/task-spec/src/task-spec/op_task_signature.cc b/lib/task-spec/src/task-spec/op_task_signature.cc index 94ac16d092..8296bac3b9 100644 --- a/lib/task-spec/src/task-spec/op_task_signature.cc +++ b/lib/task-spec/src/task-spec/op_task_signature.cc @@ -2,6 +2,9 @@ #include "utils/fmt/optional.h" #include "utils/fmt/unordered_map.h" #include "utils/fmt/unordered_set.h" +#include "utils/hash/tuple.h" +#include "utils/hash/unordered_map.h" +#include "utils/hash/unordered_set.h" namespace FlexFlow { @@ -148,6 +151,25 @@ std::unordered_map return this->task_arg_types; } +bool OpTaskSignature::operator==(OpTaskSignature const &other) const { + return this->tie() == other.tie(); +} + +bool OpTaskSignature::operator!=(OpTaskSignature const &other) const { + return this->tie() != other.tie(); +} + +std::tuple const &, + std::unordered_map const &, + std::unordered_set const &> + OpTaskSignature::tie() const { + return std::tie(this->type, + this->return_value, + this->task_arg_types, + this->op_tensor_slots); +} + std::string format_as(OpTaskSignature const &x) { std::ostringstream oss; oss << "::operator()( + ::FlexFlow::OpTaskSignature const &x) const { + return get_std_hash(x.tie()); +} + +} // namespace std diff --git a/lib/task-spec/src/task-spec/op_task_to_task_invocation.cc b/lib/task-spec/src/task-spec/op_task_to_task_invocation.cc deleted file mode 100644 index b33edc9a76..0000000000 --- a/lib/task-spec/src/task-spec/op_task_to_task_invocation.cc +++ /dev/null @@ -1,162 +0,0 @@ -#include "task-spec/op_task_to_task_invocation.h" -#include "op-attrs/parallel_tensor_shape.h" -#include "pcg/cg_operator_tensor_shape_signature.h" -#include "pcg/computation_graph.h" -#include "task-spec/slot_grad_id.dtg.h" -#include "task-spec/training_layer_plus_context.h" -#include "task-spec/training_layer_tensor_group_signature.h" -#include "utils/containers/map_values.h" -#include "utils/containers/transform.h" -#include "utils/overload.h" - -namespace FlexFlow { - -TaskInvocation - lower_to_task_invocation(OpTaskInvocation const &op_task_invocation, - TrainingLayerPlusContext const &training_layer, - std::optional const - &device_specific_device_states) { - - std::unordered_map - tensor_bindings = - transform(op_task_invocation.binding.get_tensor_bindings(), - [&](SlotGradId const &slot_grad_id, - OpTensorSpec const &op_tensor_spec) { - return lower_tensor_binding( - get_tensor_group_signature(training_layer), - slot_grad_id, - op_tensor_spec); - }); - - std::unordered_map arg_bindings = map_values( - op_task_invocation.binding.get_arg_bindings(), - [&](OpArgSpec const &op_arg_spec) { - return lower_to_task_arg_spec(op_arg_spec, - get_cg_op_shape_signature(training_layer), - training_layer.layer_guid, - device_specific_device_states); - }); - - return TaskInvocation{ - op_task_invocation.task_id, - TaskBinding{ - tensor_bindings, - arg_bindings, - }, - }; -} - -std::pair - lower_tensor_binding(TrainingLayerTensorGroupSignature const &signature, - SlotGradId const &slot_grad_id, - OpTensorSpec const &op_tensor_spec) { - auto [tensor_to_bind, gradient_tensor_guid_to_bind] = [&] { - TrainingTensorGroup group = get_training_tensor_group_for_role_and_index( - signature, op_tensor_spec.role, op_tensor_spec.idx); - - return std::pair{ - group.forward_tensor, - group.gradient_tensor, - }; - }(); - - if (slot_grad_id.is_grad == IsGrad::NO) { - return std::pair{ - tensor_sub_slot_id_t{ - slot_grad_id.slot_id, - TensorType::FORWARD, - }, - training_tensor_guid_t{ - tensor_to_bind, - }, - }; - } else if (slot_grad_id.is_grad == IsGrad::YES) { - return std::pair{ - tensor_sub_slot_id_t{ - slot_grad_id.slot_id, - TensorType::GRADIENT, - }, - training_tensor_guid_t{ - gradient_tensor_guid_to_bind, - }, - }; - } else { - PANIC("Invalid value for IsGrad {}", slot_grad_id.is_grad); - } -} - -TaskArgSpec lower_to_task_arg_spec( - OpArgSpec const &op_arg_spec, - CGOperatorTensorShapeSignature const &op_shape_signature, - layer_guid_t const &layer_guid, - std::optional const - &device_specific_device_states) { - return op_arg_spec.visit(overload{ - [](ConcreteArgSpec const &concrete_arg_spec) { - return TaskArgSpec{concrete_arg_spec}; - }, - [](RuntimeArgRefSpec const &runtime_arg_ref_spec) { - return TaskArgSpec{runtime_arg_ref_spec}; - }, - [&](OpArgRefSpec const &op_arg_ref_spec) { - return TaskArgSpec{ - lower_to_concrete_arg_spec(op_arg_ref_spec, - op_shape_signature, - layer_guid, - device_specific_device_states), - }; - }, - }); -} - -ConcreteArgSpec lower_to_concrete_arg_spec( - OpArgRefSpec const &op_arg_ref_spec, - CGOperatorTensorShapeSignature const &op_signature, - layer_guid_t const &op_guid, - std::optional const &device_states) { - - OpArgRefType op_arg_ref_type = op_arg_ref_spec.get_ref_type(); - return op_arg_ref_type.visit(overload{ - [&](PerDeviceOpStateRefType const &) { - PerDeviceOpState per_device_op_state = - get_device_state_from_device_specific(device_states.value(), 0); - - return per_device_op_state.visit(overload{ - [&](auto const &x) { - ASSERT(matches(op_arg_ref_spec.get_type_index())); - return ConcreteArgSpec::create(x); - }, - }); - }, - [&](ParallelTensorShapeRefType const &ref_type) { - TensorShape tensor_shape = tensor_shape_for_role_and_index( - /*signature=*/op_signature, - /*tensor_role=*/ref_type.tensor_role, - /*index=*/ref_type.idx); - ParallelTensorShape shape = lift_to_parallel(tensor_shape); - return ConcreteArgSpec::create(shape); - }, - }); -} - -ConcreteArgSpec - lower_to_concrete_arg_spec(RuntimeArgRefSpec const &runtime_arg_ref_spec, - RuntimeArgConfig const &runtime_arg_config) { - switch (runtime_arg_ref_spec.get_ref_type()) { - case RuntimeArgRefType::FF_HANDLE: - return ConcreteArgSpec::create(*(runtime_arg_config.ff_handle.get(0))); - case RuntimeArgRefType::PROFILING_SETTINGS: - return ConcreteArgSpec::create(runtime_arg_config.profiling_settings); - case RuntimeArgRefType::FF_ITERATION_CONFIG: - PANIC("FF_ITERATION_CONFIG is currently not handled. Please create an " - "issue or contact the FlexFlow train developers if you need this " - "feature."); - case RuntimeArgRefType::KERNEL_DEVICE_TYPE: - return ConcreteArgSpec::create(runtime_arg_config.kernel_device_type); - default: - PANIC(fmt::format("Unhandled RuntimeArgRefType {}", - runtime_arg_ref_spec.get_ref_type())); - } -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/ops/attention.cc b/lib/task-spec/src/task-spec/ops/attention.cc index ea2282792a..b56ab6ce93 100644 --- a/lib/task-spec/src/task-spec/ops/attention.cc +++ b/lib/task-spec/src/task-spec/ops/attention.cc @@ -97,7 +97,7 @@ OpTaskInvocation backward(MultiHeadAttentionAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto const &attrs = acc.get_argument(ATTRS); Allocator allocator = acc.get_allocator(); @@ -154,9 +154,8 @@ static DeviceSpecificDeviceStates /*kvSeqLength=*/kvSeqLength.int_from_positive_int(), /*add_bias_kv=*/attrs.add_bias_kv); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/batch_matmul.cc b/lib/task-spec/src/task-spec/ops/batch_matmul.cc index f8d6955b41..2638df6b73 100644 --- a/lib/task-spec/src/task-spec/ops/batch_matmul.cc +++ b/lib/task-spec/src/task-spec/ops/batch_matmul.cc @@ -77,48 +77,17 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { DeviceType kernel_device_type = acc.get_argument(KERNEL_DEVICE_TYPE); - positive_int m = dim_at_idx(b_input.shape.dims, legion_dim_t{0_n}); - ASSERT(m == dim_at_idx(output.shape.dims, legion_dim_t{0_n})); - positive_int n = dim_at_idx(a_input.shape.dims, legion_dim_t{1_n}); - ASSERT(n == dim_at_idx(output.shape.dims, legion_dim_t{1_n})); - positive_int k = dim_at_idx(a_input.shape.dims, legion_dim_t{0_n}); - ASSERT(k == dim_at_idx(b_input.shape.dims, legion_dim_t{1_n})); - - ASSERT(get_num_elements(a_input.shape.dims) == - get_num_elements(b_input.shape.dims)); - ASSERT(get_num_elements(a_input.shape.dims) == - get_num_elements(output.shape.dims)); - - positive_int batch = 1_p; - for (nonnegative_int i : - nonnegative_range(2_n, get_num_dims(a_input.shape.dims))) { - positive_int dim_size = dim_at_idx(a_input.shape.dims, legion_dim_t{i}); - ASSERT(dim_size == dim_at_idx(b_input.shape.dims, legion_dim_t{i})); - ASSERT(dim_size == dim_at_idx(output.shape.dims, legion_dim_t{i})); - batch *= dim_size; - } - - auto get_raw_seq_len = [](std::optional seq_len) -> int { - return transform(seq_len, - [](nonnegative_int x) { return x.unwrap_nonnegative(); }) - .value_or(-1); - }; - return profile(forward_kernel, profiling, kernel_device_type, "[BatchMatmul] forward_time = {:.2lf}ms\n", handle, - output.get_float_ptr(), - a_input.get_float_ptr(), - b_input.get_float_ptr(), - m.int_from_positive_int(), - n.int_from_positive_int(), - k.int_from_positive_int(), - batch.int_from_positive_int(), - get_raw_seq_len(attrs.a_seq_length_dim), - get_raw_seq_len(attrs.b_seq_length_dim), - iter_config.seq_length); + output, + a_input, + b_input, + iter_config.seq_length, + attrs.a_seq_length_dim, + attrs.b_seq_length_dim); } static std::optional @@ -143,42 +112,17 @@ static std::optional auto b_input_grad = acc.get_tensor_grad(B_INPUT); ASSERT(b_input.shape == b_input_grad.shape); - // check dins - positive_int m = dim_at_idx(b_input.shape.dims, legion_dim_t{0_n}); - ASSERT(m == dim_at_idx(output.shape.dims, legion_dim_t{0_n})); - positive_int n = dim_at_idx(a_input.shape.dims, legion_dim_t{1_n}); - ASSERT(n == dim_at_idx(output.shape.dims, legion_dim_t{1_n})); - positive_int k = dim_at_idx(a_input.shape.dims, legion_dim_t{0_n}); - ASSERT(k == dim_at_idx(b_input.shape.dims, legion_dim_t{1_n})); - ASSERT(get_num_elements(a_input.shape.dims) == - get_num_elements(b_input.shape.dims)); - ASSERT(get_num_elements(a_input.shape.dims) == - get_num_elements(output.shape.dims)); - - positive_int batch = 1_p; - for (nonnegative_int i : - nonnegative_range(2_n, get_num_dims(a_input.shape.dims))) { - positive_int dim_size = dim_at_idx(a_input.shape.dims, legion_dim_t{i}); - ASSERT(dim_size == dim_at_idx(b_input.shape.dims, legion_dim_t{i})); - ASSERT(dim_size == dim_at_idx(output.shape.dims, legion_dim_t{i})); - batch *= dim_size; - } - return profile(backward_kernel, profiling, kernel_device_type, "[BatchMatmul] backward_time = {:.2lf}ms\n", handle, - output.get_float_ptr(), - output_grad.get_float_ptr(), - a_input.get_float_ptr(), - a_input_grad.get_float_ptr(), - b_input.get_float_ptr(), - b_input_grad.get_float_ptr(), - m.int_from_positive_int(), - n.int_from_positive_int(), - k.int_from_positive_int(), - batch.int_from_positive_int()); + output, + output_grad, + a_input, + a_input_grad, + b_input, + b_input_grad); } TaskImplFunction get_batch_matmul_fwd_task_impl() { diff --git a/lib/task-spec/src/task-spec/ops/batch_norm.cc b/lib/task-spec/src/task-spec/ops/batch_norm.cc index 0599eec3f5..9f2e8a086f 100644 --- a/lib/task-spec/src/task-spec/ops/batch_norm.cc +++ b/lib/task-spec/src/task-spec/ops/batch_norm.cc @@ -80,7 +80,7 @@ OpTaskInvocation backward(BatchNormAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { Allocator allocator = acc.get_allocator(); device_handle_t handle = acc.get_argument(HANDLE); @@ -109,9 +109,8 @@ static DeviceSpecificDeviceStates /*output_w=*/output_w.int_from_positive_int(), /*relu=*/attrs.relu); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/conv_2d.cc b/lib/task-spec/src/task-spec/ops/conv_2d.cc index d7110eabfa..2b6ce1bc97 100644 --- a/lib/task-spec/src/task-spec/ops/conv_2d.cc +++ b/lib/task-spec/src/task-spec/ops/conv_2d.cc @@ -63,7 +63,7 @@ OpTaskInvocation backward(Conv2DAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { device_handle_t handle = acc.get_argument(HANDLE); @@ -91,9 +91,8 @@ static DeviceSpecificDeviceStates /*filter_ptr=*/filter.get_float_ptr(), /*filter_grad_ptr=*/filter_grad.get_float_ptr()); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/dropout.cc b/lib/task-spec/src/task-spec/ops/dropout.cc index a36506984e..30e53436f9 100644 --- a/lib/task-spec/src/task-spec/ops/dropout.cc +++ b/lib/task-spec/src/task-spec/ops/dropout.cc @@ -60,7 +60,7 @@ OpTaskInvocation backward(DropoutAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto output = acc.get_tensor(OUTPUT); Allocator allocator = acc.get_allocator(); @@ -77,9 +77,8 @@ static DeviceSpecificDeviceStates output.shape, allocator); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/element_binary.cc b/lib/task-spec/src/task-spec/ops/element_binary.cc index a5f9f012fe..1575b3ea7f 100644 --- a/lib/task-spec/src/task-spec/ops/element_binary.cc +++ b/lib/task-spec/src/task-spec/ops/element_binary.cc @@ -66,7 +66,7 @@ OpTaskInvocation backward(ElementBinaryAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto input_lhs = acc.get_tensor(LHS_INPUT); auto input_rhs = acc.get_tensor(RHS_INPUT); @@ -87,9 +87,8 @@ static DeviceSpecificDeviceStates input_rhs.shape, output.shape); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/element_unary.cc b/lib/task-spec/src/task-spec/ops/element_unary.cc index f8df53b578..31bc605f94 100644 --- a/lib/task-spec/src/task-spec/ops/element_unary.cc +++ b/lib/task-spec/src/task-spec/ops/element_unary.cc @@ -66,7 +66,7 @@ OpTaskInvocation backward(ElementUnaryAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto attrs = acc.get_argument(ATTRS); @@ -84,9 +84,8 @@ static DeviceSpecificDeviceStates get_piece_shape(output_shape), attrs); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/embedding.cc b/lib/task-spec/src/task-spec/ops/embedding.cc index 4ba32c8483..1a534d25c2 100644 --- a/lib/task-spec/src/task-spec/ops/embedding.cc +++ b/lib/task-spec/src/task-spec/ops/embedding.cc @@ -55,8 +55,8 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { input.shape.data_type, output.shape.data_type, attrs.aggr, - get_num_dims(input.shape.dims).unwrap_nonnegative(), - get_num_dims(output.shape.dims).unwrap_nonnegative(), + get_num_dims(input.shape.dims), + get_num_dims(output.shape.dims), dim_at_idx(input.shape.dims, legion_dim_t{1_n}).int_from_positive_int()); } @@ -82,8 +82,8 @@ static std::optional output.shape.data_type, input.shape.data_type, attrs.aggr, - get_num_dims(input.shape.dims).unwrap_nonnegative(), - get_num_dims(output.shape.dims).unwrap_nonnegative(), + get_num_dims(input.shape.dims), + get_num_dims(output.shape.dims), dim_at_idx(input.shape.dims, ff_dim_t{0_n}).int_from_positive_int()); } diff --git a/lib/task-spec/src/task-spec/ops/gather.cc b/lib/task-spec/src/task-spec/ops/gather.cc index 7f8aacf9d6..8bd44a7470 100644 --- a/lib/task-spec/src/task-spec/ops/gather.cc +++ b/lib/task-spec/src/task-spec/ops/gather.cc @@ -79,7 +79,7 @@ OpTaskInvocation backward(GatherAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto input = acc.get_tensor(INPUT); auto index = acc.get_tensor(INDEX); @@ -103,9 +103,8 @@ static DeviceSpecificDeviceStates std::optional per_device_state = init_kernel(kernel_device_type, handle, attrs.dim); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/layer_norm.cc b/lib/task-spec/src/task-spec/ops/layer_norm.cc index b37e63c2d1..16c05d0d25 100644 --- a/lib/task-spec/src/task-spec/ops/layer_norm.cc +++ b/lib/task-spec/src/task-spec/ops/layer_norm.cc @@ -133,7 +133,7 @@ static std::optional beta_grad); } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto const &attrs = acc.get_argument(ATTRS); DeviceType kernel_device_type = @@ -161,9 +161,8 @@ static DeviceSpecificDeviceStates effective_num_elements.int_from_positive_int(), attrs.eps); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/linear.cc b/lib/task-spec/src/task-spec/ops/linear.cc index 9ce02bc7fd..bbf76e8636 100644 --- a/lib/task-spec/src/task-spec/ops/linear.cc +++ b/lib/task-spec/src/task-spec/ops/linear.cc @@ -69,7 +69,7 @@ OpTaskInvocation backward(LinearAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto const &attrs = acc.get_argument(ATTRS); device_handle_t handle = acc.get_argument(HANDLE); @@ -94,13 +94,12 @@ static DeviceSpecificDeviceStates batch_size.int_from_positive_int(), attrs.out_channels.int_from_positive_int()); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } -static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { +static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { auto input = acc.get_tensor(INPUT); auto weight = acc.get_tensor(WEIGHT); auto output = acc.get_tensor(OUTPUT); @@ -131,7 +130,7 @@ static std::optional forward_task_impl(TaskArgumentAccessor const &acc) { return result; } -static std::optional +static std::optional backward_task_impl(TaskArgumentAccessor const &acc) { auto input = acc.get_tensor(INPUT); auto weight = acc.get_tensor(WEIGHT); diff --git a/lib/task-spec/src/task-spec/ops/pool_2d.cc b/lib/task-spec/src/task-spec/ops/pool_2d.cc index 20707acb2d..c0cc7f9ef2 100644 --- a/lib/task-spec/src/task-spec/ops/pool_2d.cc +++ b/lib/task-spec/src/task-spec/ops/pool_2d.cc @@ -47,7 +47,7 @@ static nonnegative_int calculate_padding(nonnegative_int output_size, }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { auto const &attrs = acc.get_argument(ATTRS); device_handle_t handle = acc.get_argument(HANDLE); @@ -86,9 +86,8 @@ static DeviceSpecificDeviceStates attrs.stride_w.int_from_positive_int(), attrs.pool_type); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/reduce.cc b/lib/task-spec/src/task-spec/ops/reduce.cc index d8818393ec..c8b31502a8 100644 --- a/lib/task-spec/src/task-spec/ops/reduce.cc +++ b/lib/task-spec/src/task-spec/ops/reduce.cc @@ -36,7 +36,7 @@ OpTaskInvocation init(ReduceAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { device_handle_t handle = acc.get_argument(HANDLE); DeviceType kernel_device_type = @@ -58,9 +58,8 @@ static DeviceSpecificDeviceStates input.shape, output.shape); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/ops/softmax.cc b/lib/task-spec/src/task-spec/ops/softmax.cc index 89ea42299f..5c003888ba 100644 --- a/lib/task-spec/src/task-spec/ops/softmax.cc +++ b/lib/task-spec/src/task-spec/ops/softmax.cc @@ -72,7 +72,7 @@ OpTaskInvocation backward(SoftmaxAttrs const &attrs) { }; } -static DeviceSpecificDeviceStates +static DeviceSpecificPerDeviceOpState init_task_impl(TaskArgumentAccessor const &acc) { device_handle_t handle = acc.get_argument(HANDLE); DeviceType kernel_device_type = @@ -95,9 +95,8 @@ static DeviceSpecificDeviceStates output_h.int_from_positive_int(), output_w.int_from_positive_int()); - return DeviceSpecificDeviceStates{ - DeviceSpecific>::create( - per_device_state), + return DeviceSpecificPerDeviceOpState{ + acc.make_device_specific(per_device_state), }; } diff --git a/lib/task-spec/src/task-spec/optimizer.cc b/lib/task-spec/src/task-spec/optimizer.cc index c8fa23c2af..9cfd47375d 100644 --- a/lib/task-spec/src/task-spec/optimizer.cc +++ b/lib/task-spec/src/task-spec/optimizer.cc @@ -3,6 +3,7 @@ #include "task-spec/profiling.h" #include "utils/containers/get_only.h" #include "utils/overload.h" +#include "utils/units/milliseconds_t.h" namespace FlexFlow { @@ -18,11 +19,11 @@ enum Slots { KERNEL_DEVICE_TYPE, }; -TaskSignature get_sgd_update_signature() { - TaskSignature sig = make_empty_task_signature(); - add_slot(sig, WEIGHT, TensorType::FORWARD); - add_slot(sig, WEIGHT_GRAD, TensorType::GRADIENT); - add_slot(sig, SGD_V, TensorType::OPTIMIZER); +RuntimeTaskSignature get_sgd_update_signature() { + RuntimeTaskSignature sig = make_empty_runtime_task_signature(); + add_slot(sig, WEIGHT, TrainingTensorType::FORWARD); + add_slot(sig, WEIGHT_GRAD, TrainingTensorType::GRADIENT); + add_slot(sig, SGD_V, TrainingTensorType::OPTIMIZER); add_arg_slot(sig, ATTRS); add_arg_slot(sig, PROFILING); @@ -36,11 +37,11 @@ TaskSignature get_sgd_update_signature() { return sig; } -TaskInvocation sgd_update(SGDOptimizerAttrs const &attrs, - forward_tensor_guid_t const &weight, - gradient_tensor_guid_t const &weight_grad, - optimizer_tensor_guid_t const &sgd_v) { - TaskBinding b; +RuntimeTaskInvocation sgd_update(SGDOptimizerAttrs const &attrs, + symbolic_forward_tensor_guid_t const &weight, + symbolic_gradient_tensor_guid_t const &weight_grad, + symbolic_optimizer_tensor_guid_t const &sgd_v) { + RuntimeTaskBinding b; b.bind(WEIGHT, weight); b.bind_grad(WEIGHT_GRAD, weight_grad); @@ -52,7 +53,7 @@ TaskInvocation sgd_update(SGDOptimizerAttrs const &attrs, b.bind_arg(KERNEL_DEVICE_TYPE, kernel_device_type()); b.bind_arg(HANDLE, ff_handle()); - return TaskInvocation{task_id_t::SGD_UPD_NCCL_TASK_ID, + return RuntimeTaskInvocation{task_id_t::SGD_UPD_NCCL_TASK_ID, b}; // how to deal with removal of ParamSync? // if (CHOSEN_SYNC_TYPE == ParamSync::NCCL) { @@ -136,12 +137,12 @@ TaskImplFunction get_sgd_update_task_impl() { return TaskImplFunction{GenericTaskImplFunction{sgd_update_task_impl}}; } -TaskSignature get_adam_update_signature() { - TaskSignature sig = make_empty_task_signature(); - add_slot(sig, WEIGHT, TensorType::FORWARD); - add_slot(sig, WEIGHT_GRAD, TensorType::GRADIENT); - add_slot(sig, ADAM_V, TensorType::OPTIMIZER); - add_slot(sig, ADAM_M, TensorType::OPTIMIZER); +RuntimeTaskSignature get_adam_update_signature() { + RuntimeTaskSignature sig = make_empty_runtime_task_signature(); + add_slot(sig, WEIGHT, TrainingTensorType::FORWARD); + add_slot(sig, WEIGHT_GRAD, TrainingTensorType::GRADIENT); + add_slot(sig, ADAM_V, TrainingTensorType::OPTIMIZER); + add_slot(sig, ADAM_M, TrainingTensorType::OPTIMIZER); add_arg_slot(sig, ATTRS); add_arg_slot(sig, PROFILING); @@ -154,12 +155,12 @@ TaskSignature get_adam_update_signature() { return sig; } -TaskInvocation adam_update(AdamOptimizerAttrs const &attrs, - forward_tensor_guid_t const &weight, - gradient_tensor_guid_t const &weight_grad, - optimizer_tensor_guid_t const &adam_v, - optimizer_tensor_guid_t const &adam_m) { - TaskBinding b; +RuntimeTaskInvocation adam_update(AdamOptimizerAttrs const &attrs, + symbolic_forward_tensor_guid_t const &weight, + symbolic_gradient_tensor_guid_t const &weight_grad, + symbolic_optimizer_tensor_guid_t const &adam_v, + symbolic_optimizer_tensor_guid_t const &adam_m) { + RuntimeTaskBinding b; b.bind(WEIGHT, weight); b.bind_grad(WEIGHT_GRAD, weight_grad); b.bind_optimizer(ADAM_M, adam_m); @@ -168,7 +169,7 @@ TaskInvocation adam_update(AdamOptimizerAttrs const &attrs, b.bind_arg(PROFILING, profiling_settings()); b.bind_arg(KERNEL_DEVICE_TYPE, kernel_device_type()); b.bind_arg(HANDLE, ff_handle()); - return TaskInvocation{task_id_t::ADAM_UPD_NCCL_TASK_ID, + return RuntimeTaskInvocation{task_id_t::ADAM_UPD_NCCL_TASK_ID, b}; // how to deal with removal of ParamSync? // if (CHOSEN_SYNC_TYPE == ParamSync::NCCL) { @@ -256,18 +257,18 @@ TaskImplFunction get_adam_update_task_impl() { return TaskImplFunction{GenericTaskImplFunction{adam_update_task_impl}}; } -TaskSignature get_update_signature(OptimizerAttrs const &attrs) { - return attrs.visit(overload{ +RuntimeTaskSignature get_update_signature(OptimizerAttrs const &attrs) { + return attrs.visit(overload{ [&](SGDOptimizerAttrs const &) { return get_sgd_update_signature(); }, [&](AdamOptimizerAttrs const &) { return get_adam_update_signature(); }}); } -TaskInvocation get_update_invocation( +RuntimeTaskInvocation get_update_invocation( OptimizerAttrs const &attrs, - forward_tensor_guid_t const &weight, - gradient_tensor_guid_t const &weight_grad, - std::vector const &grad_buffer_tensors) { - return attrs.visit( + symbolic_forward_tensor_guid_t const &weight, + symbolic_gradient_tensor_guid_t const &weight_grad, + std::vector const &grad_buffer_tensors) { + return attrs.visit( overload{[&](SGDOptimizerAttrs const &s) { return sgd_update( s, weight, weight_grad, get_only(grad_buffer_tensors)); diff --git a/lib/task-spec/src/task-spec/optimizer_tensor_source.cc b/lib/task-spec/src/task-spec/optimizer_tensor_source.cc deleted file mode 100644 index ad7bf9f489..0000000000 --- a/lib/task-spec/src/task-spec/optimizer_tensor_source.cc +++ /dev/null @@ -1,18 +0,0 @@ -#include "task-spec/optimizer_tensor_source.h" - -namespace FlexFlow { - -int OptimizerTensorSource::next_available_optimizer_tensor_id = 0; - -OptimizerTensorSource::OptimizerTensorSource() {} - -optimizer_tensor_guid_t OptimizerTensorSource::new_optimizer_tensor() { - return optimizer_tensor_guid_t{ - OptimizerTensorSource::next_available_optimizer_tensor_id++}; -} - -void OptimizerTensorSource::reset() { - OptimizerTensorSource::next_available_optimizer_tensor_id = 0; -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/per_device_op_state.cc b/lib/task-spec/src/task-spec/per_device_op_state.cc index a959f4a8c9..3928d44199 100644 --- a/lib/task-spec/src/task-spec/per_device_op_state.cc +++ b/lib/task-spec/src/task-spec/per_device_op_state.cc @@ -4,7 +4,7 @@ namespace FlexFlow { PerDeviceOpState get_device_state_from_device_specific( - DeviceSpecificDeviceStates const &device_specific, size_t device_idx) { + DeviceSpecificPerDeviceOpState const &device_specific, device_id_t device_idx) { return device_specific.visit( [&](auto const &x) { return PerDeviceOpState{*(x.get(device_idx))}; }); } diff --git a/lib/task-spec/src/task-spec/runtime_arg_config.cc b/lib/task-spec/src/task-spec/runtime_arg_config.cc index 9f3dc61545..e4ad88741e 100644 --- a/lib/task-spec/src/task-spec/runtime_arg_config.cc +++ b/lib/task-spec/src/task-spec/runtime_arg_config.cc @@ -1,13 +1,16 @@ #include "task-spec/runtime_arg_config.h" #include "kernels/device_handle_t.h" +#include "task-spec/lower_op_task_invocation_to_runtime_task_invocation.h" +#include "utils/containers/map_values.h" +#include "utils/overload.h" namespace FlexFlow { RuntimeArgConfig - cpu_make_runtime_arg_config(EnableProfiling enable_profiling, + cpu_make_runtime_arg_config(device_id_t device_id, + EnableProfiling enable_profiling, ProfilingSettings profiling_settings) { return RuntimeArgConfig{ - DeviceSpecific::create(cpu_make_device_handle_t()), enable_profiling, profiling_settings, DeviceType::CPU, @@ -15,12 +18,11 @@ RuntimeArgConfig } RuntimeArgConfig - gpu_make_runtime_arg_config(PerDeviceFFHandle const &ff_handle, + gpu_make_runtime_arg_config(device_id_t device_id, + PerDeviceFFHandle const &ff_handle, EnableProfiling enable_profiling, ProfilingSettings profiling_settings) { return RuntimeArgConfig{ - DeviceSpecific::create( - gpu_make_device_handle_t(ff_handle)), enable_profiling, profiling_settings, DeviceType::GPU, diff --git a/lib/task-spec/src/task-spec/runtime_arg_ref.cc b/lib/task-spec/src/task-spec/runtime_arg_ref.cc index 3aa1b7f907..d01c3fd5b6 100644 --- a/lib/task-spec/src/task-spec/runtime_arg_ref.cc +++ b/lib/task-spec/src/task-spec/runtime_arg_ref.cc @@ -1,23 +1,40 @@ #include "task-spec/runtime_arg_ref.h" #include "kernels/device_handle_t.dtg.h" #include "task-spec/device_specific.h" +#include "utils/archetypes/value_type.h" namespace FlexFlow { RuntimeArgRef profiling_settings() { - return {RuntimeArgRefType::PROFILING_SETTINGS}; + return {RuntimeArgRefType{ + ArgumentlessRuntimeArgRefType::PROFILING_SETTINGS + }}; } RuntimeArgRef> ff_handle() { - return {RuntimeArgRefType::FF_HANDLE}; + return {RuntimeArgRefType{ + ArgumentlessRuntimeArgRefType::FF_HANDLE + }}; } RuntimeArgRef iteration_config() { - return {RuntimeArgRefType::FF_ITERATION_CONFIG}; + return {RuntimeArgRefType{ + ArgumentlessRuntimeArgRefType::FF_ITERATION_CONFIG, + }}; } RuntimeArgRef kernel_device_type() { - return {RuntimeArgRefType::KERNEL_DEVICE_TYPE}; + return {RuntimeArgRefType{ + ArgumentlessRuntimeArgRefType::KERNEL_DEVICE_TYPE + }}; +} + +RuntimeArgRef per_device_op_state_for_layer(symbolic_layer_guid_t layer) { + return {RuntimeArgRefType{ + PerDeviceOpStateRuntimeArgRefType{ + layer, + }, + }}; } } // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/runtime_arg_ref_spec.cc b/lib/task-spec/src/task-spec/runtime_arg_ref_spec.cc new file mode 100644 index 0000000000..9245ccdb2d --- /dev/null +++ b/lib/task-spec/src/task-spec/runtime_arg_ref_spec.cc @@ -0,0 +1 @@ +#include "task-spec/runtime_arg_ref_spec.h" diff --git a/lib/task-spec/src/task-spec/task_arg_spec.cc b/lib/task-spec/src/task-spec/runtime_arg_spec.cc similarity index 63% rename from lib/task-spec/src/task-spec/task_arg_spec.cc rename to lib/task-spec/src/task-spec/runtime_arg_spec.cc index 36fa2f71fd..df40a290c8 100644 --- a/lib/task-spec/src/task-spec/task_arg_spec.cc +++ b/lib/task-spec/src/task-spec/runtime_arg_spec.cc @@ -1,9 +1,9 @@ -#include "task-spec/task_arg_spec.h" +#include "task-spec/runtime_arg_spec.h" #include "utils/overload.h" namespace FlexFlow { -std::type_index get_type_index(TaskArgSpec const &task_arg_spec) { +std::type_index get_type_index(RuntimeArgSpec const &task_arg_spec) { return task_arg_spec.visit( overload{[](auto const &e) { return e.get_type_index(); }}); } diff --git a/lib/task-spec/src/task-spec/runtime_task_binding.cc b/lib/task-spec/src/task-spec/runtime_task_binding.cc new file mode 100644 index 0000000000..668cfd7711 --- /dev/null +++ b/lib/task-spec/src/task-spec/runtime_task_binding.cc @@ -0,0 +1,109 @@ +#include "task-spec/runtime_task_binding.h" +#include "pcg/tensor_guid_t.dtg.h" +#include "utils/containers/contains_key.h" +#include "utils/fmt/unordered_map.h" +#include "utils/hash/tuple.h" +#include "utils/hash/unordered_map.h" + +namespace FlexFlow { + +RuntimeTaskBinding::RuntimeTaskBinding() : tensor_bindings(), arg_bindings() {} + +RuntimeTaskBinding::RuntimeTaskBinding( + std::unordered_map const + &tensor_bindings, + std::unordered_map const &arg_bindings) + : tensor_bindings(tensor_bindings), arg_bindings(arg_bindings) {} + +void RuntimeTaskBinding::bind(int name, symbolic_forward_tensor_guid_t const &binding) { + this->bind(slot_id_t{name}, binding); +} + +void RuntimeTaskBinding::bind(slot_id_t name, symbolic_forward_tensor_guid_t const &binding) { + this->tensor_bindings.insert({training_tensor_slot_id_t{name, TrainingTensorType::FORWARD}, + symbolic_training_tensor_guid_t{binding}}); +} + +void RuntimeTaskBinding::bind_grad(int name, symbolic_gradient_tensor_guid_t const &binding) { + this->bind_grad(slot_id_t{name}, binding); +} + +void RuntimeTaskBinding::bind_grad(slot_id_t name, + symbolic_gradient_tensor_guid_t const &binding) { + this->tensor_bindings.insert( + {training_tensor_slot_id_t{name, TrainingTensorType::GRADIENT}, + symbolic_training_tensor_guid_t{binding}}); +} + +void RuntimeTaskBinding::bind_optimizer(int name, + symbolic_optimizer_tensor_guid_t const &binding) { + this->bind_optimizer(slot_id_t{name}, binding); +} + +void RuntimeTaskBinding::bind_optimizer(slot_id_t name, + symbolic_optimizer_tensor_guid_t const &binding) { + this->tensor_bindings.insert( + {training_tensor_slot_id_t{name, TrainingTensorType::OPTIMIZER}, + symbolic_training_tensor_guid_t{binding}}); +} + +void RuntimeTaskBinding::bind_loss(int name, symbolic_loss_tensor_guid_t const &binding) { + this->bind_loss(slot_id_t{name}, binding); +} + +void RuntimeTaskBinding::bind_loss(slot_id_t name, symbolic_loss_tensor_guid_t const &binding) { + this->tensor_bindings.insert({training_tensor_slot_id_t{name, TrainingTensorType::LOSS}, + symbolic_training_tensor_guid_t{binding}}); +} + +void RuntimeTaskBinding::insert_arg_spec(slot_id_t name, RuntimeArgSpec const &arg_spec) { + assert(!contains_key(this->arg_bindings, name)); + this->arg_bindings.insert({name, arg_spec}); +} + +bool RuntimeTaskBinding::operator==(RuntimeTaskBinding const &other) const { + return this->tie() == other.tie(); +} + +bool RuntimeTaskBinding::operator!=(RuntimeTaskBinding const &other) const { + return this->tie() != other.tie(); +} + +std::tuple< + std::unordered_map const &, + std::unordered_map const &> + RuntimeTaskBinding::tie() const { + return std::tie(this->tensor_bindings, this->arg_bindings); +} + +std::unordered_map const & + RuntimeTaskBinding::get_tensor_bindings() const { + return this->tensor_bindings; +} + +std::unordered_map const & + RuntimeTaskBinding::get_arg_bindings() const { + return this->arg_bindings; +} + +std::string format_as(RuntimeTaskBinding const &x) { + return fmt::format( + "", + x.get_tensor_bindings(), + x.get_arg_bindings()); +} + +std::ostream &operator<<(std::ostream &s, RuntimeTaskBinding const &x) { + return (s << fmt::to_string(x)); +} + +} // namespace FlexFlow + +namespace std { + +size_t hash<::FlexFlow::RuntimeTaskBinding>::operator()( + ::FlexFlow::RuntimeTaskBinding const &s) const { + return ::FlexFlow::get_std_hash(s.tie()); +} + +} // namespace std diff --git a/lib/task-spec/src/task-spec/task_invocation.cc b/lib/task-spec/src/task-spec/runtime_task_invocation.cc similarity index 69% rename from lib/task-spec/src/task-spec/task_invocation.cc rename to lib/task-spec/src/task-spec/runtime_task_invocation.cc index 0677ff6e60..590eb9e090 100644 --- a/lib/task-spec/src/task-spec/task_invocation.cc +++ b/lib/task-spec/src/task-spec/runtime_task_invocation.cc @@ -1,13 +1,13 @@ -#include "task-spec/task_invocation.h" -#include "task-spec/task_arg_spec.h" +#include "task-spec/runtime_task_invocation.h" +#include "task-spec/runtime_arg_spec.h" #include "utils/containers/keys.h" namespace FlexFlow { -bool is_invocation_valid(TaskSignature const &sig, TaskInvocation const &inv) { - TaskBinding binding = inv.binding; +bool is_invocation_valid(RuntimeTaskSignature const &sig, RuntimeTaskInvocation const &inv) { + RuntimeTaskBinding binding = inv.binding; - for (std::pair const &arg_binding : + for (std::pair const &arg_binding : binding.get_arg_bindings()) { if (sig.task_arg_types.count(arg_binding.first)) { if (get_type_index(arg_binding.second) != @@ -19,7 +19,7 @@ bool is_invocation_valid(TaskSignature const &sig, TaskInvocation const &inv) { } } - for (std::pair const + for (std::pair const &tensor_binding : binding.get_tensor_bindings()) { slot_id_t tensor_slot_id = tensor_binding.first.slot_id; if (sig.tensor_guid_slots.count(tensor_slot_id)) { diff --git a/lib/task-spec/src/task-spec/task_signature.cc b/lib/task-spec/src/task-spec/runtime_task_signature.cc similarity index 54% rename from lib/task-spec/src/task-spec/task_signature.cc rename to lib/task-spec/src/task-spec/runtime_task_signature.cc index 3ac038e8c5..f1667151ab 100644 --- a/lib/task-spec/src/task-spec/task_signature.cc +++ b/lib/task-spec/src/task-spec/runtime_task_signature.cc @@ -1,21 +1,21 @@ -#include "task-spec/task_signature.h" +#include "task-spec/runtime_task_signature.h" namespace FlexFlow { -TaskSignature make_empty_task_signature() { - return TaskSignature(std::nullopt, {}, {}); +RuntimeTaskSignature make_empty_runtime_task_signature() { + return RuntimeTaskSignature(std::nullopt, {}, {}); } -void add_slot(TaskSignature &task_signature, +void add_slot(RuntimeTaskSignature &task_signature, int name, - TensorType tensor_type, + TrainingTensorType tensor_type, SlotType slot_type) { add_slot(task_signature, slot_id_t{name}, tensor_type, slot_type); } -void add_slot(TaskSignature &task_signature, +void add_slot(RuntimeTaskSignature &task_signature, slot_id_t name, - TensorType tensor_type, + TrainingTensorType tensor_type, SlotType slot_type) { TensorTypeSlotSpec tensor_guid_slot_spec = TensorTypeSlotSpec{name, tensor_type, slot_type}; diff --git a/lib/task-spec/src/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.cc b/lib/task-spec/src/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.cc new file mode 100644 index 0000000000..d36c2f1785 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.cc @@ -0,0 +1,27 @@ +#include "task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.h" + +namespace FlexFlow { + +SymbolicLayerTrainingTensorGroupSignatureWithShapes + get_signature_with_shapes(SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &attrs_and_signature) { + + return SymbolicLayerTrainingTensorGroupSignatureWithShapes{ + /*input_tensor_groups=*/attrs_and_signature.input_tensor_groups, + /*weight_tensor_groups=*/attrs_and_signature.weight_tensor_groups, + /*output_tensor_groups=*/attrs_and_signature.output_tensor_groups, + }; +} + +SymbolicCgOpAttrsAndTrainingSignatureWithShapes + make_training_cg_op_attrs_and_signature_with_shapes( + ComputationGraphOpAttrs const &op_attrs, + SymbolicLayerTrainingTensorGroupSignatureWithShapes const &signature) { + return SymbolicCgOpAttrsAndTrainingSignatureWithShapes{ + /*op_attrs=*/op_attrs, + /*input_tensor_groups=*/signature.input_tensor_groups, + /*weight_tensor_groups=*/signature.weight_tensor_groups, + /*output_tensor_groups=*/signature.output_tensor_groups, + }; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_forward_tensor_source.cc b/lib/task-spec/src/task-spec/symbolic_forward_tensor_source.cc new file mode 100644 index 0000000000..4a2f778b0c --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_forward_tensor_source.cc @@ -0,0 +1,18 @@ +#include "task-spec/symbolic_forward_tensor_source.h" + +namespace FlexFlow { + +int SymbolicForwardTensorSource::next_available_symbolic_forward_tensor_id = 0; + +SymbolicForwardTensorSource::SymbolicForwardTensorSource() {} + +symbolic_forward_tensor_guid_t SymbolicForwardTensorSource::new_symbolic_forward_tensor() { + return symbolic_forward_tensor_guid_t{ + SymbolicForwardTensorSource::next_available_symbolic_forward_tensor_id++}; +} + +void SymbolicForwardTensorSource::reset() { + SymbolicForwardTensorSource::next_available_symbolic_forward_tensor_id = 0; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_gradient_tensor_source.cc b/lib/task-spec/src/task-spec/symbolic_gradient_tensor_source.cc new file mode 100644 index 0000000000..e36b3809ca --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_gradient_tensor_source.cc @@ -0,0 +1,18 @@ +#include "task-spec/symbolic_gradient_tensor_source.h" + +namespace FlexFlow { + +int SymbolicGradientTensorSource::next_available_symbolic_gradient_tensor_id = 0; + +SymbolicGradientTensorSource::SymbolicGradientTensorSource() {} + +symbolic_gradient_tensor_guid_t SymbolicGradientTensorSource::new_symbolic_gradient_tensor() { + return symbolic_gradient_tensor_guid_t{ + SymbolicGradientTensorSource::next_available_symbolic_gradient_tensor_id++}; +} + +void SymbolicGradientTensorSource::reset() { + SymbolicGradientTensorSource::next_available_symbolic_gradient_tensor_id = 0; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_layer_tensor_shape_signature.cc b/lib/task-spec/src/task-spec/symbolic_layer_tensor_shape_signature.cc new file mode 100644 index 0000000000..f0458c9a48 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_layer_tensor_shape_signature.cc @@ -0,0 +1,30 @@ +#include "task-spec/symbolic_layer_tensor_shape_signature.h" +#include "utils/containers/at_idx.h" +#include + +namespace FlexFlow { + +std::vector + tensor_shapes_for_role(SymbolicLayerTensorShapeSignature const &signature, + TensorRole tensor_role) { + switch (tensor_role) { + case TensorRole::INPUT: + return signature.input_shapes; + case TensorRole::WEIGHT: + return signature.weight_shapes; + case TensorRole::OUTPUT: + return signature.output_shapes; + default: + PANIC("Unhandled TensorRole", tensor_role); + } +} + +TensorShape + tensor_shape_for_role_and_index(SymbolicLayerTensorShapeSignature const &signature, + TensorRole tensor_role, + nonnegative_int tensor_idx) { + return at_idx(tensor_shapes_for_role(signature, tensor_role), tensor_idx); +} + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_layer_training_tensor_group_signature.cc b/lib/task-spec/src/task-spec/symbolic_layer_training_tensor_group_signature.cc new file mode 100644 index 0000000000..4282a065b2 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_layer_training_tensor_group_signature.cc @@ -0,0 +1,47 @@ +#include "task-spec/symbolic_layer_training_tensor_group_signature.h" +#include "task-spec/symbolic_training_tensor_group.h" +#include +#include "utils/containers/transform.h" + +namespace FlexFlow { + +std::vector get_training_tensor_groups_for_role( + SymbolicLayerTrainingTensorGroupSignature const &signature, + TensorRole tensor_role) { + + switch (tensor_role) { + case TensorRole::INPUT: + return signature.input_tensor_groups; + case TensorRole::WEIGHT: + return signature.weight_tensor_groups; + case TensorRole::OUTPUT: + return signature.output_tensor_groups; + default: + PANIC("Unhandled TensorRole {}", tensor_role); + } +} + +SymbolicTrainingTensorGroup get_training_tensor_group_for_role_and_index( + SymbolicLayerTrainingTensorGroupSignature const &signature, + TensorRole tensor_role, + nonnegative_int index) { + + return get_training_tensor_groups_for_role(signature, tensor_role) + .at(index.unwrap_nonnegative()); +} + +std::vector + get_training_tensors_for_role_and_type(SymbolicLayerTrainingTensorGroupSignature const &signature, + TensorRole tensor_role, + FwbTensorType tensor_type) { + std::vector + groups = get_training_tensor_groups_for_role(signature, tensor_role); + + return transform(groups, + [&](SymbolicTrainingTensorGroup const &g) -> symbolic_training_tensor_guid_t { + return get_training_tensor_for_type(g, tensor_type); + }); +} + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.cc b/lib/task-spec/src/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.cc new file mode 100644 index 0000000000..f68d268f3d --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.cc @@ -0,0 +1,43 @@ +#include "task-spec/symbolic_layer_training_tensor_group_signature_with_shapes.h" +#include "task-spec/symbolic_training_tensor_group_with_shape.h" +#include "utils/containers/transform.h" + +namespace FlexFlow { + + +SymbolicLayerTrainingTensorGroupSignature + drop_shapes_from_signature(SymbolicLayerTrainingTensorGroupSignatureWithShapes const &s) { + + auto drop_shapes = [](std::vector const &groups) { + return transform(groups, + [](SymbolicTrainingTensorGroupWithShape const &g) { + return drop_shape_from_group(g); + }); + }; + + return SymbolicLayerTrainingTensorGroupSignature{ + /*input_tensor_groups=*/drop_shapes(s.input_tensor_groups), + /*weight_tensor_groups=*/drop_shapes(s.weight_tensor_groups), + /*output_tensor_groups=*/drop_shapes(s.output_tensor_groups), + }; +} + +SymbolicLayerTensorShapeSignature + get_shape_signature(SymbolicLayerTrainingTensorGroupSignatureWithShapes const &s) { + + auto get_shapes = [](std::vector const &groups) { + return transform(groups, + [](SymbolicTrainingTensorGroupWithShape const &g) { + return g.tensor_shape; + }); + }; + + return SymbolicLayerTensorShapeSignature{ + /*input_shapes=*/get_shapes(s.input_tensor_groups), + /*weight_shapes=*/get_shapes(s.weight_tensor_groups), + /*output_shapes=*/get_shapes(s.output_tensor_groups), + }; +} + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_loss_tensor_source.cc b/lib/task-spec/src/task-spec/symbolic_loss_tensor_source.cc new file mode 100644 index 0000000000..eea4c329ca --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_loss_tensor_source.cc @@ -0,0 +1,13 @@ +#include "task-spec/symbolic_loss_tensor_source.h" + +namespace FlexFlow { + +nonnegative_int SymbolicLossTensorSource::next_available_symbolic_loss_tensor_id = 0_n; + +SymbolicLossTensorSource::SymbolicLossTensorSource() {} + +symbolic_loss_tensor_guid_t SymbolicLossTensorSource::new_symbolic_loss_tensor() { + return symbolic_loss_tensor_guid_t{SymbolicLossTensorSource::next_available_symbolic_loss_tensor_id++}; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_optimizer_tensor_source.cc b/lib/task-spec/src/task-spec/symbolic_optimizer_tensor_source.cc new file mode 100644 index 0000000000..07e8f9cd83 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_optimizer_tensor_source.cc @@ -0,0 +1,18 @@ +#include "task-spec/symbolic_optimizer_tensor_source.h" + +namespace FlexFlow { + +int SymbolicOptimizerTensorSource::next_available_symbolic_optimizer_tensor_id = 0; + +SymbolicOptimizerTensorSource::SymbolicOptimizerTensorSource() {} + +symbolic_optimizer_tensor_guid_t SymbolicOptimizerTensorSource::new_symbolic_optimizer_tensor() { + return symbolic_optimizer_tensor_guid_t{ + SymbolicOptimizerTensorSource::next_available_symbolic_optimizer_tensor_id++}; +} + +void SymbolicOptimizerTensorSource::reset() { + SymbolicOptimizerTensorSource::next_available_symbolic_optimizer_tensor_id = 0; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_training_layer_attrs_plus_context.cc b/lib/task-spec/src/task-spec/symbolic_training_layer_attrs_plus_context.cc new file mode 100644 index 0000000000..74e17bda50 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_training_layer_attrs_plus_context.cc @@ -0,0 +1,87 @@ +#include "task-spec/symbolic_training_layer_attrs_plus_context.h" +#include "task-spec/symbolic_layer_training_tensor_group_signature.dtg.h" +#include "utils/containers/transform.h" +#include + +namespace FlexFlow { + +std::vector + get_training_tensor_groups_for_role( + SymbolicTrainingLayerAttrsPlusContext const &training_layer_plus_context, + TensorRole tensor_role) { + + switch (tensor_role) { + case TensorRole::INPUT: + return training_layer_plus_context.input_tensor_groups; + case TensorRole::WEIGHT: + return training_layer_plus_context.weight_tensor_groups; + case TensorRole::OUTPUT: + return training_layer_plus_context.output_tensor_groups; + default: + PANIC("Unhandled TensorRole {}", tensor_role); + } +} + +SymbolicTrainingTensorGroup + get_training_tensor_group_for_role_and_index( + SymbolicTrainingLayerAttrsPlusContext const &training_layer_plus_context, + TensorRole tensor_role, + nonnegative_int index) { + + return get_training_tensor_groups_for_role( + training_layer_plus_context, tensor_role) + .at(index.unwrap_nonnegative()); +} + +std::vector + get_input_tensors(SymbolicTrainingLayerAttrsPlusContext const &l) { + return transform( + l.input_tensor_groups, + [](SymbolicTrainingTensorGroup const &g) { return g.forward_tensor; }); +} + +std::vector + get_input_grad_tensors(SymbolicTrainingLayerAttrsPlusContext const &l) { + return transform( + l.input_tensor_groups, + [](SymbolicTrainingTensorGroup const &g) { return g.gradient_tensor; }); +} + +std::vector + get_weight_tensors(SymbolicTrainingLayerAttrsPlusContext const &l) { + return transform( + l.weight_tensor_groups, + [](SymbolicTrainingTensorGroup const &g) { return g.forward_tensor; }); +} + +std::vector + get_weight_grad_tensors(SymbolicTrainingLayerAttrsPlusContext const &l) { + return transform( + l.weight_tensor_groups, + [](SymbolicTrainingTensorGroup const &g) { return g.gradient_tensor; }); +} + +std::vector + get_output_tensors(SymbolicTrainingLayerAttrsPlusContext const &l) { + return transform( + l.output_tensor_groups, + [](SymbolicTrainingTensorGroup const &g) { return g.forward_tensor; }); +} + +std::vector + get_output_grad_tensors(SymbolicTrainingLayerAttrsPlusContext const &l) { + return transform( + l.output_tensor_groups, + [](SymbolicTrainingTensorGroup const &g) { return g.gradient_tensor; }); +} + +SymbolicLayerTrainingTensorGroupSignature + get_tensor_group_signature(SymbolicTrainingLayerAttrsPlusContext const &l) { + return SymbolicLayerTrainingTensorGroupSignature{ + /*input_tensor_groups=*/l.input_tensor_groups, + /*weight_tensor_groups=*/l.weight_tensor_groups, + /*output_tensor_groups=*/l.output_tensor_groups, + }; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_training_tensor_group.cc b/lib/task-spec/src/task-spec/symbolic_training_tensor_group.cc new file mode 100644 index 0000000000..93e8201276 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_training_tensor_group.cc @@ -0,0 +1,59 @@ +#include "task-spec/symbolic_training_tensor_group.h" +#include "pcg/optimizer_attrs.h" +#include "utils/containers/repeat.h" +#include "utils/containers/set_union.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" + +namespace FlexFlow { + +SymbolicTrainingTensorGroup make_symbolic_training_tensor_group_for_tensor_guid_t( + CreateGrad create_grad, + OptimizerAttrs const &optimizer_attrs, + SymbolicForwardTensorSource &forward_tensor_source, + SymbolicGradientTensorSource &gradient_tensor_source, + SymbolicOptimizerTensorSource &optimizer_tensor_source) { + + nonnegative_int num_optimizer_tensors = [&]() { + if (create_grad == CreateGrad::YES) { + return get_num_optimizer_tensors(optimizer_attrs); + } else { + return 0_n; + } + }(); + + return SymbolicTrainingTensorGroup{ + /*forward_tensor=*/forward_tensor_source.new_symbolic_forward_tensor(), + /*gradient_tensor=*/gradient_tensor_source.new_symbolic_gradient_tensor(), + /*optimizer_tensors=*/ + repeat(num_optimizer_tensors, + [&]() { return optimizer_tensor_source.new_symbolic_optimizer_tensor(); }), + }; +} + +symbolic_training_tensor_guid_t + get_training_tensor_for_type(SymbolicTrainingTensorGroup const &group, FwbTensorType tensor_type) { + switch (tensor_type) { + case FwbTensorType::FORWARD: + return symbolic_training_tensor_guid_t{group.forward_tensor}; + case FwbTensorType::GRADIENT: + return symbolic_training_tensor_guid_t{group.gradient_tensor}; + default: + PANIC("Unhandled FwbTensorType", tensor_type); + } +} + +std::unordered_set + get_all_training_tensors_in_tensor_group(SymbolicTrainingTensorGroup const &group) { + return set_union( + std::unordered_set{ + symbolic_training_tensor_guid_t{group.forward_tensor}, + symbolic_training_tensor_guid_t{group.gradient_tensor}, + }, + transform(unordered_set_of(group.optimizer_tensors), + [](symbolic_optimizer_tensor_guid_t optimizer_tensor) { + return symbolic_training_tensor_guid_t{optimizer_tensor}; + })); +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_training_tensor_group_with_attrs.cc b/lib/task-spec/src/task-spec/symbolic_training_tensor_group_with_attrs.cc new file mode 100644 index 0000000000..8505673cf7 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_training_tensor_group_with_attrs.cc @@ -0,0 +1,19 @@ +#include "task-spec/symbolic_training_tensor_group_with_attrs.h" + +namespace FlexFlow { + +SymbolicTrainingTensorGroupWithAttrs + make_symbolic_training_tensor_group_with_attrs_from_group_and_attrs( + SymbolicTrainingTensorGroup const &tensor_group, + TensorShape const &tensor_shape) { + + return SymbolicTrainingTensorGroupWithAttrs{ + /*tensor_shape=*/tensor_shape, + /*forward_tensor=*/tensor_group.forward_tensor, + /*gradient_tensor=*/tensor_group.gradient_tensor, + /*optimizer_tensors=*/tensor_group.optimizer_tensors, + }; +} + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/symbolic_training_tensor_group_with_shape.cc b/lib/task-spec/src/task-spec/symbolic_training_tensor_group_with_shape.cc new file mode 100644 index 0000000000..1993f86a58 --- /dev/null +++ b/lib/task-spec/src/task-spec/symbolic_training_tensor_group_with_shape.cc @@ -0,0 +1,12 @@ +#include "task-spec/symbolic_training_tensor_group_with_shape.h" + +namespace FlexFlow { + +SymbolicTrainingTensorGroup + drop_shape_from_group(SymbolicTrainingTensorGroupWithShape const &g) { + + return g.training_tensor_group; +} + + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/task_signature_impl.cc b/lib/task-spec/src/task-spec/task_signature_impl.cc index 8da38b5840..d2cb90449a 100644 --- a/lib/task-spec/src/task-spec/task_signature_impl.cc +++ b/lib/task-spec/src/task-spec/task_signature_impl.cc @@ -237,7 +237,7 @@ std::vector get_task_ids(ComputationGraphOpAttrs const &op) { }); } -OpTaskInvocation +std::optional get_init_op_task_invocation(ComputationGraphOpAttrs const &op) { return op.visit(overload{ [](BatchNormAttrs const &attrs) { return init(attrs); }, @@ -258,7 +258,7 @@ OpTaskInvocation }); } -OpTaskInvocation +std::optional get_forward_op_task_invocation(ComputationGraphOpAttrs const &op) { return op.visit(overload{ [](BatchMatmulAttrs const &attrs) { return forward(attrs); }, @@ -289,7 +289,7 @@ OpTaskInvocation }); } -OpTaskInvocation +std::optional get_backward_op_task_invocation(ComputationGraphOpAttrs const &op) { return op.visit(overload{ [](BatchMatmulAttrs const &attrs) { return backward(attrs); }, @@ -320,4 +320,19 @@ OpTaskInvocation }); } +std::optional get_op_task_invocation( + ComputationGraphOpAttrs const &op_attrs, + OpTaskType task_type) { + switch (task_type) { + case OpTaskType::INIT: + return get_init_op_task_invocation(op_attrs); + case OpTaskType::FWD: + return get_forward_op_task_invocation(op_attrs); + case OpTaskType::BWD: + return get_backward_op_task_invocation(op_attrs); + default: + PANIC("Unhandled OpTaskType", op_attrs); + }; +} + } // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_computation_graph.cc b/lib/task-spec/src/task-spec/training_computation_graph.cc deleted file mode 100644 index f50930d684..0000000000 --- a/lib/task-spec/src/task-spec/training_computation_graph.cc +++ /dev/null @@ -1,183 +0,0 @@ -#include "task-spec/training_computation_graph.h" -#include "task-spec/loss_tensor_source.h" -#include "task-spec/training_tensor_group.h" -#include "task-spec/training_tensor_group_with_attrs.h" -#include "utils/containers/contains.h" -#include "utils/containers/filter_values.h" -#include "utils/containers/flatmap.h" -#include "utils/containers/generate_map.h" -#include "utils/containers/get_only.h" -#include "utils/containers/keys.h" -#include "utils/containers/set_of.h" -#include "utils/containers/transform.h" -#include "utils/overload.h" - -namespace FlexFlow { - -TrainingComputationGraph generate_training_computation_graph( - ComputationGraph const &computation_graph, - OptimizerAttrs const &optimizer_attrs, - tensor_guid_t const &logit_tensor, - ForwardTensorSource &forward_tensor_source, - GradientTensorSource &gradient_tensor_source, - OptimizerTensorSource &optimizer_tensor_source, - LossTensorSource &loss_tensor_source) { - - loss_tensor_guid_t label_tensor = loss_tensor_source.new_loss_tensor(); - - return TrainingComputationGraph{ - /*computation_graph=*/computation_graph, - /*training_tensor_group_for_tensor=*/ - transform( - get_all_tensor_attrs(computation_graph), - [&](tensor_guid_t tensor_guid, TensorAttrs const &tensor_attrs) { - return std::pair{ - tensor_guid, - make_training_tensor_group_for_tensor_guid_t( - /*tensor_guid=*/tensor_guid, - /*tensor_attrs=*/tensor_attrs, - /*optimizer_attrs=*/optimizer_attrs, - /*forward_tensor_source=*/forward_tensor_source, - /*gradient_tensor_source=*/gradient_tensor_source, - /*optimizer_tensor_source=*/optimizer_tensor_source), - }; - }), - /*logit_tensor=*/logit_tensor, - /*label_tensor=*/label_tensor, - }; -} - -TrainingTensorGroup get_training_tensor_group_for_tensor_guid( - TrainingComputationGraph const &training_cg, tensor_guid_t tensor_guid) { - - return training_cg.training_tensor_group_for_tensor.at(tensor_guid); -} - -TrainingTensorGroupWithAttrs - get_training_tensor_group_with_attrs_for_tensor_guid( - TrainingComputationGraph const &training_cg, - tensor_guid_t tensor_guid) { - return make_training_tensor_group_with_attrs_from_group_and_attrs( - /*group=*/get_training_tensor_group_for_tensor_guid(training_cg, - tensor_guid), - /*attrs=*/get_tensor_attrs(training_cg.computation_graph, tensor_guid)); -} - -forward_tensor_guid_t get_forward_tensor_guid_for_tensor_guid( - TrainingComputationGraph const &training_cg, tensor_guid_t t) { - return training_cg.training_tensor_group_for_tensor.at(t).forward_tensor; -} - -gradient_tensor_guid_t get_gradient_tensor_guid_for_tensor_guid( - TrainingComputationGraph const &training_cg, tensor_guid_t t) { - return training_cg.training_tensor_group_for_tensor.at(t).gradient_tensor; -} - -std::vector get_optimizer_tensor_guids_for_tensor_guid( - TrainingComputationGraph const &training_cg, tensor_guid_t t) { - return training_cg.training_tensor_group_for_tensor.at(t).optimizer_tensors; -} - -tensor_guid_t get_tensor_guid_for_forward_tensor_guid( - TrainingComputationGraph const &training_cg, forward_tensor_guid_t t) { - return get_only(keys(filter_values( - training_cg.training_tensor_group_for_tensor, - [&](TrainingTensorGroup const &g) { return g.forward_tensor == t; }))); -} - -tensor_guid_t get_tensor_guid_for_gradient_tensor_guid( - TrainingComputationGraph const &training_cg, gradient_tensor_guid_t t) { - return get_only(keys(filter_values( - training_cg.training_tensor_group_for_tensor, - [&](TrainingTensorGroup const &g) { return g.gradient_tensor == t; }))); -} - -tensor_guid_t get_tensor_guid_for_optimizer_tensor_guid( - TrainingComputationGraph const &training_cg, optimizer_tensor_guid_t t) { - return get_only( - keys(filter_values(training_cg.training_tensor_group_for_tensor, - [&](TrainingTensorGroup const &g) { - return contains(g.optimizer_tensors, t); - }))); -} - -tensor_guid_t get_tensor_guid_for_training_tensor_guid( - TrainingComputationGraph const &training_cg, training_tensor_guid_t t) { - return t.visit(overload{ - [&](forward_tensor_guid_t forward_tensor) { - return get_tensor_guid_for_forward_tensor_guid(training_cg, - forward_tensor); - }, - [&](gradient_tensor_guid_t gradient_tensor) { - return get_tensor_guid_for_gradient_tensor_guid(training_cg, - gradient_tensor); - }, - [&](optimizer_tensor_guid_t optimizer_tensor) { - return get_tensor_guid_for_optimizer_tensor_guid(training_cg, - optimizer_tensor); - }, - [&](loss_tensor_guid_t loss_tensor) -> tensor_guid_t { - PANIC("no tensor_guid_t can exist for a loss_tensor_guid_t"); - }, - }); -} - -std::unordered_set - get_all_training_tensors_in_training_computation_graph( - TrainingComputationGraph const &training_cg) { - std::unordered_set result = flatmap( - unordered_set_of(keys(training_cg.training_tensor_group_for_tensor)), - [&](tensor_guid_t t) { - return get_all_training_tensors_in_tensor_group( - training_cg.training_tensor_group_for_tensor.at(t)); - }); - - result.insert(training_tensor_guid_t{training_cg.label_tensor}); - return result; -} - -TrainingLayerPlusContext - get_training_layer_plus_context(TrainingComputationGraph const &training_cg, - layer_guid_t layer_guid) { - auto get_tensor_group_with_attrs = - [&](tensor_guid_t t) -> TrainingTensorGroupWithAttrs { - return get_training_tensor_group_with_attrs_for_tensor_guid(training_cg, t); - }; - - return TrainingLayerPlusContext{ - /*layer_guid=*/layer_guid, - /*layer_attrs=*/ - get_layer_attrs(training_cg.computation_graph, layer_guid), - /*input_tensor_groups=*/ - transform(get_incoming_inputs(training_cg.computation_graph, layer_guid), - get_tensor_group_with_attrs), - /*weight_tensor_groups=*/ - transform(get_incoming_weights(training_cg.computation_graph, layer_guid), - get_tensor_group_with_attrs), - /*output_tensor_groups=*/ - transform(get_outgoing_tensors(training_cg.computation_graph, layer_guid), - get_tensor_group_with_attrs), - }; -} - -std::unordered_map - get_all_training_tensor_shapes( - TrainingComputationGraph const &training_cg) { - return generate_map( - get_all_training_tensors_in_training_computation_graph(training_cg), - [&](training_tensor_guid_t t) { - if (t.is_loss_tensor()) { - ASSERT(t == training_tensor_guid_t{training_cg.label_tensor}); - return get_tensor_attrs(training_cg.computation_graph, - training_cg.logit_tensor) - .shape; - } - - return get_tensor_attrs( - training_cg.computation_graph, - get_tensor_guid_for_training_tensor_guid(training_cg, t)) - .shape; - }); -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_layer_plus_context.cc b/lib/task-spec/src/task-spec/training_layer_plus_context.cc deleted file mode 100644 index 9adbc6b2a1..0000000000 --- a/lib/task-spec/src/task-spec/training_layer_plus_context.cc +++ /dev/null @@ -1,122 +0,0 @@ -#include "task-spec/training_layer_plus_context.h" -#include "task-spec/training_tensor_group_with_attrs.h" -#include "utils/containers/transform.h" - -namespace FlexFlow { - -std::vector - get_training_tensor_groups_with_attrs_for_role( - TrainingLayerPlusContext const &training_layer_plus_context, - TensorRole tensor_role) { - - switch (tensor_role) { - case TensorRole::INPUT: - return training_layer_plus_context.input_tensor_groups; - case TensorRole::WEIGHT: - return training_layer_plus_context.weight_tensor_groups; - case TensorRole::OUTPUT: - return training_layer_plus_context.output_tensor_groups; - default: - PANIC("Unhandled TensorRole {}", tensor_role); - } -} - -TrainingTensorGroupWithAttrs - get_training_tensor_group_with_attrs_for_role_and_index( - TrainingLayerPlusContext const &training_layer_plus_context, - TensorRole tensor_role, - nonnegative_int index) { - - return get_training_tensor_groups_with_attrs_for_role( - training_layer_plus_context, tensor_role) - .at(index.unwrap_nonnegative()); -} - -std::vector - get_input_tensors(TrainingLayerPlusContext const &l) { - return transform( - l.input_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { return g.forward_tensor; }); -} - -std::vector - get_input_grad_tensors(TrainingLayerPlusContext const &l) { - return transform( - l.input_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { return g.gradient_tensor; }); -} - -std::vector - get_input_tensor_shapes(TrainingLayerPlusContext const &l) { - return transform(l.input_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { - return g.tensor_attrs.shape; - }); -} - -std::vector - get_weight_tensors(TrainingLayerPlusContext const &l) { - return transform( - l.weight_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { return g.forward_tensor; }); -} - -std::vector - get_weight_grad_tensors(TrainingLayerPlusContext const &l) { - return transform( - l.weight_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { return g.gradient_tensor; }); -} - -std::vector - get_weight_tensor_shapes(TrainingLayerPlusContext const &l) { - return transform(l.weight_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { - return g.tensor_attrs.shape; - }); -} - -std::vector - get_output_tensors(TrainingLayerPlusContext const &l) { - return transform( - l.output_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { return g.forward_tensor; }); -} - -std::vector - get_output_grad_tensors(TrainingLayerPlusContext const &l) { - return transform( - l.output_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { return g.gradient_tensor; }); -} - -std::vector - get_output_tensor_shapes(TrainingLayerPlusContext const &l) { - return transform(l.output_tensor_groups, - [](TrainingTensorGroupWithAttrs const &g) { - return g.tensor_attrs.shape; - }); -} - -TrainingLayerTensorGroupSignature - get_tensor_group_signature(TrainingLayerPlusContext const &l) { - return TrainingLayerTensorGroupSignature{ - /*input_tensor_groups=*/transform(l.input_tensor_groups, - tensor_group_without_attrs), - /*weight_tensor_groups=*/ - transform(l.weight_tensor_groups, tensor_group_without_attrs), - /*output_tensor_groups=*/ - transform(l.output_tensor_groups, tensor_group_without_attrs), - }; -} - -CGOperatorTensorShapeSignature - get_cg_op_shape_signature(TrainingLayerPlusContext const &l) { - return CGOperatorTensorShapeSignature{ - /*input_shapes=*/get_input_tensor_shapes(l), - /*weight_shapes=*/get_weight_tensor_shapes(l), - /*output_shapes=*/get_output_tensor_shapes(l), - }; -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_layer_tensor_group_signature.cc b/lib/task-spec/src/task-spec/training_layer_tensor_group_signature.cc deleted file mode 100644 index db8b8015ec..0000000000 --- a/lib/task-spec/src/task-spec/training_layer_tensor_group_signature.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include "task-spec/training_layer_tensor_group_signature.h" -#include - -namespace FlexFlow { - -std::vector get_training_tensor_groups_for_role( - TrainingLayerTensorGroupSignature const &signature, - TensorRole tensor_role) { - - switch (tensor_role) { - case TensorRole::INPUT: - return signature.input_tensor_groups; - case TensorRole::WEIGHT: - return signature.weight_tensor_groups; - case TensorRole::OUTPUT: - return signature.output_tensor_groups; - default: - PANIC("Unhandled TensorRole {}", tensor_role); - } -} - -TrainingTensorGroup get_training_tensor_group_for_role_and_index( - TrainingLayerTensorGroupSignature const &signature, - TensorRole tensor_role, - nonnegative_int index) { - - return get_training_tensor_groups_for_role(signature, tensor_role) - .at(index.unwrap_nonnegative()); -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_symbolic_computation_graph.cc b/lib/task-spec/src/task-spec/training_symbolic_computation_graph.cc new file mode 100644 index 0000000000..4a1cf2e328 --- /dev/null +++ b/lib/task-spec/src/task-spec/training_symbolic_computation_graph.cc @@ -0,0 +1,221 @@ +#include "task-spec/training_symbolic_computation_graph.h" +#include "pcg/computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_layer_attrs.h" +#include "task-spec/lower_op_task_invocation_to_runtime_task_invocation.h" +#include "task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.h" +#include "task-spec/symbolic_loss_tensor_source.h" +#include "task-spec/symbolic_training_tensor_group.h" +#include "task-spec/task_signature_impl.h" +#include "task-spec/symbolic_cg_op_attrs_and_training_signature_with_shapes.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/containers/contains.h" +#include "utils/containers/filter_values.h" +#include "utils/containers/flatmap.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/get_only.h" +#include "utils/containers/keys.h" +#include "utils/containers/set_of.h" +#include "utils/containers/transform.h" +#include "utils/overload.h" +#include "task-spec/loss_functions.h" +#include "task-spec/optimizer.h" + +namespace FlexFlow { + +TensorShape get_symbolic_tensor_shape(TrainingSymbolicComputationGraph const &g, + symbolic_tensor_guid_t t) { + return g.symbolic_computation_graph.at(t.raw_graph_output); +} + +PCGOperatorAttrs get_op_attrs_for_symbolic_layer_guid(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t) { + NOT_IMPLEMENTED(); +} + +SymbolicLayerTrainingTensorGroupSignatureWithShapes + get_signature_with_shapes_for_symbolic_layer_guid(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t) { + NOT_IMPLEMENTED(); +} + +symbolic_forward_tensor_guid_t get_forward_symbolic_tensor_guid_for_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &g, symbolic_tensor_guid_t t) { + return g.symbolic_training_tensor_group_for_tensor.at(t).forward_tensor; +} + +symbolic_gradient_tensor_guid_t get_gradient_symbolic_tensor_guid_for_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, tensor_guid_t) { + NOT_IMPLEMENTED(); +} + +std::vector get_optimizer_symbolic_tensor_guids_for_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, tensor_guid_t) { + NOT_IMPLEMENTED(); +} + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_forward_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_forward_tensor_guid_t) { + NOT_IMPLEMENTED(); +} + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_gradient_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_gradient_tensor_guid_t) { + NOT_IMPLEMENTED(); +} + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_optimizer_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_optimizer_tensor_guid_t) { + NOT_IMPLEMENTED(); +} + +symbolic_tensor_guid_t get_symbolic_tensor_guid_for_training_symbolic_tensor_guid( + TrainingSymbolicComputationGraph const &, symbolic_training_tensor_guid_t) { + NOT_IMPLEMENTED(); +} + +std::unordered_set + get_all_symbolic_training_tensors_in_training_computation_graph( + TrainingSymbolicComputationGraph const &) { + NOT_IMPLEMENTED(); +} + +SymbolicTrainingLayerAttrsPlusContext + get_symbolic_training_layer_attrs_plus_context(TrainingSymbolicComputationGraph const &, + symbolic_layer_guid_t) { + NOT_IMPLEMENTED(); +} + +std::unordered_map + get_all_symbolic_training_tensor_shapes(TrainingSymbolicComputationGraph const &) { + NOT_IMPLEMENTED(); +} + +static ComputationGraphOpAttrs get_cg_op_attrs_for_symbolic_layer_guid(TrainingSymbolicComputationGraph const &g, + symbolic_layer_guid_t l) { + PCGOperatorAttrs op_attrs = get_op_attrs_for_symbolic_layer_guid(g, l); + std::optional + cg_op_attrs = compgraph_op_attrs_from_pcg_op_attrs(op_attrs); + + ASSERT(cg_op_attrs.has_value()); + + return cg_op_attrs.value(); +} + +SymbolicCgOpAttrsAndTrainingSignatureWithShapes + get_attrs_and_signature_for_layer(TrainingSymbolicComputationGraph const &g, + symbolic_layer_guid_t l) { + + ComputationGraphOpAttrs cg_op_attrs = get_cg_op_attrs_for_symbolic_layer_guid(g, l); + + SymbolicLayerTrainingTensorGroupSignatureWithShapes layer_signature + = get_signature_with_shapes_for_symbolic_layer_guid(g, l); + + return make_symbolic_cg_op_attrs_and_signature_with_shapes(cg_op_attrs, layer_signature); +} + +std::optional + get_init_runtime_task_invocation_for_layer(symbolic_layer_guid_t l, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &attrs_and_signature) { + + ComputationGraphOpAttrs cg_op_attrs = attrs_and_signature.op_attrs; + + SymbolicLayerTrainingTensorGroupSignatureWithShapes layer_signature + = get_signature_with_shapes(attrs_and_signature); + + OpTaskInvocation op_task_invocation = ({ + std::optional maybe_invocation = get_init_op_task_invocation(cg_op_attrs); + if (!maybe_invocation.has_value()) { + return std::nullopt; + } + maybe_invocation.value(); + }); + + return lower_op_task_invocation_to_runtime_task_invocation( + /*op_task_invocation=*/op_task_invocation, + /*symbolic_layer_guid=*/l, + /*layer_signature=*/layer_signature); +} + +std::optional + get_forward_runtime_task_invocation_for_layer(symbolic_layer_guid_t l, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &attrs_and_signature) { + + ComputationGraphOpAttrs cg_op_attrs = attrs_and_signature.op_attrs; + + SymbolicLayerTrainingTensorGroupSignatureWithShapes layer_signature + = get_signature_with_shapes(attrs_and_signature); + + OpTaskInvocation op_task_invocation = ({ + std::optional maybe_invocation = get_forward_op_task_invocation(cg_op_attrs); + if (!maybe_invocation.has_value()) { + return std::nullopt; + } + maybe_invocation.value(); + }); + + return lower_op_task_invocation_to_runtime_task_invocation( + /*op_task_invocation=*/op_task_invocation, + /*symbolic_layer_guid=*/l, + /*layer_signature=*/layer_signature); +} + +std::optional + get_backward_task_invocation_for_layer(symbolic_layer_guid_t l, + SymbolicCgOpAttrsAndTrainingSignatureWithShapes const &attrs_and_signature) { + + ComputationGraphOpAttrs cg_op_attrs = attrs_and_signature.op_attrs; + + SymbolicLayerTrainingTensorGroupSignatureWithShapes layer_signature + = get_signature_with_shapes(attrs_and_signature); + + + OpTaskInvocation op_task_invocation = ({ + std::optional maybe_invocation = get_backward_op_task_invocation(cg_op_attrs); + if (!maybe_invocation.has_value()) { + return std::nullopt; + } + maybe_invocation.value(); + }); + + return lower_op_task_invocation_to_runtime_task_invocation( + /*op_task_invocation=*/op_task_invocation, + /*symbolic_layer_guid=*/l, + /*layer_signature=*/layer_signature); +} + +RuntimeTaskInvocation + get_compute_loss_runtime_task_invocation(LossAttrs const &loss_attrs, + symbolic_forward_tensor_guid_t loss_fwd_tensor, + symbolic_gradient_tensor_guid_t loss_grad_tensor, + symbolic_loss_tensor_guid_t label_tensor) { + + RuntimeTaskInvocation loss_invocation = loss_attrs_backward( + loss_attrs, + loss_fwd_tensor, + loss_grad_tensor, + label_tensor); + + return loss_invocation; +} + +std::optional + get_update_runtime_task_invocation_for_layer(SymbolicTrainingLayerAttrsPlusContext const &training_layer, + OptimizerAttrs const &optimizer_attrs) { + if (training_layer.layer_attrs.op_attrs.has()) { + SymbolicTrainingTensorGroup weight_tensor_group = + get_only(training_layer.output_tensor_groups); + + RuntimeTaskInvocation invocation = + optimizer_attrs_get_update_invocation( + optimizer_attrs, + /*weight=*/weight_tensor_group.forward_tensor, + /*weight_grad=*/weight_tensor_group.gradient_tensor, + /*grad_buffer_tensors=*/weight_tensor_group.optimizer_tensors); + + return invocation; + } else { + return std::nullopt; + } +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_symbolic_computation_graph_from_cg_conversion.cc b/lib/task-spec/src/task-spec/training_symbolic_computation_graph_from_cg_conversion.cc new file mode 100644 index 0000000000..393b25d09b --- /dev/null +++ b/lib/task-spec/src/task-spec/training_symbolic_computation_graph_from_cg_conversion.cc @@ -0,0 +1,122 @@ +#include "task-spec/training_symbolic_computation_graph_from_cg_conversion.h" +#include "pcg/computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_layer_attrs.h" +#include "task-spec/symbolic_training_tensor_group.h" +#include "task-spec/symbolic_training_tensor_group_with_attrs.h" +#include "task-spec/training_symbolic_computation_graph.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/graph/labelled_dataflow_graph/algorithms/rewrite_node_labels.h" +#include "utils/graph/labelled_dataflow_graph/algorithms/rewrite_value_labels.h" + +namespace FlexFlow { + +TrainingSymbolicComputationGraphFromCgConversion generate_training_computation_graph_from_cg( + ComputationGraph const &computation_graph, + LossAttrs const &loss_attrs, + OptimizerAttrs const &optimizer_attrs, + tensor_guid_t const &logit_tensor, + SymbolicForwardTensorSource &forward_tensor_source, + SymbolicGradientTensorSource &gradient_tensor_source, + SymbolicOptimizerTensorSource &optimizer_tensor_source, + SymbolicLossTensorSource &loss_tensor_source) { + + symbolic_loss_tensor_guid_t label_tensor = loss_tensor_source.new_symbolic_loss_tensor(); + + LabelledDataflowGraphView + with_nodes_relabelled = + rewrite_node_labels(computation_graph.raw_graph, + [](Node const &, LayerAttrs const &layer_attrs) -> ParallelLayerAttrs { + return parallel_layer_attrs_from_layer_attrs(layer_attrs); + }); + + LabelledDataflowGraphView + symbolic_computation_graph = + rewrite_value_labels( + with_nodes_relabelled, + [](OpenDataflowValue const &, TensorAttrs const &tensor_attrs) -> TensorShape { + return tensor_attrs.shape; + }); + + bidict + tensor_mapping = generate_bidict(get_all_tensors(computation_graph), + [](tensor_guid_t t) { + return symbolic_tensor_guid_t{t.raw_graph_output}; + }); + + bidict + layer_mapping = generate_bidict(get_layers(computation_graph), + [](layer_guid_t l) { + return symbolic_layer_guid_t{l.raw_node}; + }); + + TrainingSymbolicComputationGraph training_symbolic_cg = TrainingSymbolicComputationGraph{ + /*symbolic_computation_graph=*/symbolic_computation_graph, + /*symbolic_training_tensor_group_for_tensor=*/ + transform( + get_all_tensor_attrs(computation_graph), + [&](tensor_guid_t tensor_guid, TensorAttrs const &tensor_attrs) + -> std::pair + { + return std::pair{ + tensor_mapping.at_l(tensor_guid), + make_symbolic_training_tensor_group( + /*create_grad=*/tensor_attrs.create_grad, + /*optimizer_attrs=*/optimizer_attrs, + /*forward_tensor_source=*/forward_tensor_source, + /*gradient_tensor_source=*/gradient_tensor_source, + /*optimizer_tensor_source=*/optimizer_tensor_source), + }; + }), + /*loss_attrs=*/loss_attrs, + /*optimizer_attrs=*/optimizer_attrs, + /*logit_tensor=*/tensor_mapping.at_l(logit_tensor), + /*label_tensor=*/label_tensor, + }; + + return TrainingSymbolicComputationGraphFromCgConversion{ + /*training_symbolic_computation_graph=*/training_symbolic_cg, + /*tensor_mapping=*/tensor_mapping, + /*layer_mapping=*/layer_mapping, + }; +} + + +SymbolicTrainingTensorGroup + get_training_tensor_group_for_tensor_guid(TrainingSymbolicComputationGraphFromCgConversion const &conversion, + tensor_guid_t t) { + symbolic_tensor_guid_t symbolic_tensor_guid = conversion.tensor_mapping.at_l(t); + return conversion.training_symbolic_computation_graph.symbolic_training_tensor_group_for_tensor.at(symbolic_tensor_guid); +} + +SymbolicTrainingTensorGroupWithAttrs + get_training_tensor_group_with_attrs_for_tensor_guid( + TrainingSymbolicComputationGraphFromCgConversion const &conversion, + tensor_guid_t t) { + + symbolic_tensor_guid_t symbolic_tensor_guid = conversion.tensor_mapping.at_l(t); + + return make_symbolic_training_tensor_group_with_attrs_from_group_and_attrs( + /*group=*/get_training_tensor_group_for_tensor_guid(conversion, + t), + /*attrs=*/get_symbolic_tensor_shape(conversion.training_symbolic_computation_graph, symbolic_tensor_guid)); +} + +symbolic_layer_guid_t + get_symbolic_layer_guid_for_layer_guid(TrainingSymbolicComputationGraphFromCgConversion const &conversion, + layer_guid_t l) { + return conversion.layer_mapping.at_l(l); +} + +symbolic_tensor_guid_t + get_symbolic_tensor_guid_for_tensor_guid(TrainingSymbolicComputationGraphFromCgConversion const &conversion, + tensor_guid_t t) { + return conversion.tensor_mapping.at_l(t); +} + +layer_guid_t + get_layer_guid_for_symbolic_layer_guid(TrainingSymbolicComputationGraphFromCgConversion const &conversion, + symbolic_layer_guid_t l) { + return conversion.layer_mapping.at_r(l); +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_symbolic_computation_graph_from_pcg_conversion.cc b/lib/task-spec/src/task-spec/training_symbolic_computation_graph_from_pcg_conversion.cc new file mode 100644 index 0000000000..2027f8aa15 --- /dev/null +++ b/lib/task-spec/src/task-spec/training_symbolic_computation_graph_from_pcg_conversion.cc @@ -0,0 +1,16 @@ +#include "task-spec/training_symbolic_computation_graph_from_pcg_conversion.h" + +namespace FlexFlow { + +TrainingSymbolicComputationGraphFromPcgConversion generate_training_computation_graph_from_pcg( + ParallelComputationGraph const &computation_graph, + OptimizerAttrs const &optimizer_attrs, + parallel_tensor_guid_t const &logit_tensor, + SymbolicForwardTensorSource &forward_tensor_source, + SymbolicGradientTensorSource &gradient_tensor_source, + SymbolicOptimizerTensorSource &optimizer_tensor_source, + SymbolicLossTensorSource &loss_tensor_source) { + NOT_IMPLEMENTED(); +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_tensor_group.cc b/lib/task-spec/src/task-spec/training_tensor_group.cc deleted file mode 100644 index 0f6710b80f..0000000000 --- a/lib/task-spec/src/task-spec/training_tensor_group.cc +++ /dev/null @@ -1,48 +0,0 @@ -#include "task-spec/training_tensor_group.h" -#include "pcg/optimizer_attrs.h" -#include "utils/containers/repeat.h" -#include "utils/containers/set_union.h" -#include "utils/containers/transform.h" -#include "utils/containers/unordered_set_of.h" - -namespace FlexFlow { - -TrainingTensorGroup make_training_tensor_group_for_tensor_guid_t( - tensor_guid_t tensor_guid, - TensorAttrs const &tensor_attrs, - OptimizerAttrs const &optimizer_attrs, - ForwardTensorSource &forward_tensor_source, - GradientTensorSource &gradient_tensor_source, - OptimizerTensorSource &optimizer_tensor_source) { - - nonnegative_int num_optimizer_tensors = [&]() { - if (tensor_attrs.create_grad == CreateGrad::YES) { - return get_num_optimizer_tensors(optimizer_attrs); - } else { - return 0_n; - } - }(); - - return TrainingTensorGroup{ - /*forward_tensor=*/forward_tensor_source.new_forward_tensor(), - /*gradient_tensor=*/gradient_tensor_source.new_gradient_tensor(), - /*optimizer_tensors=*/ - repeat(num_optimizer_tensors, - [&]() { return optimizer_tensor_source.new_optimizer_tensor(); }), - }; -} - -std::unordered_set - get_all_training_tensors_in_tensor_group(TrainingTensorGroup const &group) { - return set_union( - std::unordered_set{ - training_tensor_guid_t{group.forward_tensor}, - training_tensor_guid_t{group.gradient_tensor}, - }, - transform(unordered_set_of(group.optimizer_tensors), - [](optimizer_tensor_guid_t optimizer_tensor) { - return training_tensor_guid_t{optimizer_tensor}; - })); -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_tensor_group_with_attrs.cc b/lib/task-spec/src/task-spec/training_tensor_group_with_attrs.cc deleted file mode 100644 index 6014b46446..0000000000 --- a/lib/task-spec/src/task-spec/training_tensor_group_with_attrs.cc +++ /dev/null @@ -1,26 +0,0 @@ -#include "task-spec/training_tensor_group_with_attrs.h" - -namespace FlexFlow { - -TrainingTensorGroupWithAttrs - make_training_tensor_group_with_attrs_from_group_and_attrs( - TrainingTensorGroup const &group, TensorAttrs const &attrs) { - - return TrainingTensorGroupWithAttrs{ - /*tensor_attrs=*/attrs, - /*forward_tensor=*/group.forward_tensor, - /*gradient_tensor=*/group.gradient_tensor, - /*optimizer_tensors=*/group.optimizer_tensors, - }; -} - -TrainingTensorGroup - tensor_group_without_attrs(TrainingTensorGroupWithAttrs const &with_attrs) { - return TrainingTensorGroup{ - /*forward_tensor=*/with_attrs.forward_tensor, - /*gradient_tensor=*/with_attrs.gradient_tensor, - /*optimizer_tensors=*/with_attrs.optimizer_tensors, - }; -} - -} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/training_tensor_slot_id_t.cc b/lib/task-spec/src/task-spec/training_tensor_slot_id_t.cc new file mode 100644 index 0000000000..c734198eb4 --- /dev/null +++ b/lib/task-spec/src/task-spec/training_tensor_slot_id_t.cc @@ -0,0 +1,24 @@ +#include "task-spec/training_tensor_slot_id_t.h" +#include + +namespace FlexFlow { + +training_tensor_slot_id_t + training_tensor_slot_from_fwb_slot(fwb_tensor_slot_id_t fwb_slot_id) { + + if (fwb_slot_id.is_grad == IsGrad::NO) { + return training_tensor_slot_id_t{ + fwb_slot_id.slot_id, + TrainingTensorType::FORWARD, + }; + } else if (fwb_slot_id.is_grad == IsGrad::YES) { + return training_tensor_slot_id_t{ + fwb_slot_id.slot_id, + TrainingTensorType::GRADIENT, + }; + } else { + PANIC("Invalid value for IsGrad {}", fwb_slot_id.is_grad); + } +} + +} // namespace FlexFlow diff --git a/lib/task-spec/test/src/task-spec/arg_ref.cc b/lib/task-spec/test/src/task-spec/arg_ref_spec.cc similarity index 95% rename from lib/task-spec/test/src/task-spec/arg_ref.cc rename to lib/task-spec/test/src/task-spec/arg_ref_spec.cc index 5c331a1d71..7dae8ee9cb 100644 --- a/lib/task-spec/test/src/task-spec/arg_ref.cc +++ b/lib/task-spec/test/src/task-spec/arg_ref_spec.cc @@ -1,4 +1,4 @@ -#include "task-spec/arg_ref.h" +#include "task-spec/arg_ref_spec.h" #include #include diff --git a/lib/task-spec/test/src/task-spec/lower_op_task_invocation_to_runtime_task_invocation.cc b/lib/task-spec/test/src/task-spec/lower_op_task_invocation_to_runtime_task_invocation.cc new file mode 100644 index 0000000000..e56e3ace93 --- /dev/null +++ b/lib/task-spec/test/src/task-spec/lower_op_task_invocation_to_runtime_task_invocation.cc @@ -0,0 +1,10 @@ +#include +#include "task-spec/lower_op_task_invocation_to_runtime_task_invocation.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("lower_op_task_to_runtime_task_invocation") { + CHECK_MESSAGE(false, "TODO: lower_op_task_to_runtime_task_invocation"); + } +} diff --git a/lib/task-spec/test/src/task-spec/op_ordered_slot_signature.cc b/lib/task-spec/test/src/task-spec/op_ordered_slot_signature.cc new file mode 100644 index 0000000000..0a6d5ca701 --- /dev/null +++ b/lib/task-spec/test/src/task-spec/op_ordered_slot_signature.cc @@ -0,0 +1,10 @@ +#include +#include "task-spec/op_ordered_slot_signature.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_op_ordered_slot_signature_for_binding") { + CHECK_MESSAGE(false, "TODO: get_op_ordered_slot_signature_for_binding"); + } +} diff --git a/lib/task-spec/test/src/task-spec/task_signature_impl.cc b/lib/task-spec/test/src/task-spec/task_signature_impl.cc new file mode 100644 index 0000000000..42065d956f --- /dev/null +++ b/lib/task-spec/test/src/task-spec/task_signature_impl.cc @@ -0,0 +1,10 @@ +#include +#include "task-spec/task_signature_impl.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_forward_op_task_invocation") { + CHECK_MESSAGE(false, "TODO: get_forward_op_task_invocation"); + } +} diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index 6d8f22bc29..e2f7c433d6 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -9,7 +9,6 @@ ff_add_library( src/ DEPS expected - visit_struct fmt json cuda diff --git a/lib/utils/include/utils/archetypes/jsonable_value_type.h b/lib/utils/include/utils/archetypes/jsonable_value_type.h new file mode 100644 index 0000000000..92982b461b --- /dev/null +++ b/lib/utils/include/utils/archetypes/jsonable_value_type.h @@ -0,0 +1,78 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ARCHETYPES_JSONABLE_VALUE_TYPE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ARCHETYPES_JSONABLE_VALUE_TYPE_H + +#include +#include +#include +#include +#include +#include + +namespace FlexFlow { + +template +struct jsonable_value_type { + jsonable_value_type() = delete; + + jsonable_value_type(jsonable_value_type const &) { + PANIC(); + } + jsonable_value_type &operator=(jsonable_value_type const &) { + PANIC(); + } + + jsonable_value_type(jsonable_value_type &&) { + PANIC(); + } + jsonable_value_type &operator=(jsonable_value_type &&) { + PANIC(); + } + + bool operator==(jsonable_value_type const &) const { + PANIC(); + } + bool operator!=(jsonable_value_type const &) const { + PANIC(); + } +}; + +template +std::string format_as(jsonable_value_type const &) { + PANIC(); +} + +template +std::ostream &operator<<(std::ostream &s, jsonable_value_type const &x) { + PANIC(); +} + +} // namespace FlexFlow + +namespace nlohmann { + +template +struct adl_serializer<::FlexFlow::jsonable_value_type> { + static ::FlexFlow::jsonable_value_type from_json(json const &) { + PANIC(); + } + + static void to_json(json &, ::FlexFlow::jsonable_value_type const &) { + PANIC(); + } +}; + +} // namespace nlohmann + +namespace std { + +template +struct hash<::FlexFlow::jsonable_value_type> { + size_t operator()(::FlexFlow::jsonable_value_type const &) const { + PANIC(); + }; +}; + +} // namespace std + + +#endif diff --git a/lib/utils/include/utils/archetypes/ordered_value_type.h b/lib/utils/include/utils/archetypes/ordered_value_type.h index b14f378667..3666d19d3f 100644 --- a/lib/utils/include/utils/archetypes/ordered_value_type.h +++ b/lib/utils/include/utils/archetypes/ordered_value_type.h @@ -37,6 +37,12 @@ struct ordered_value_type { bool operator>(ordered_value_type const &) const { PANIC(); } + bool operator<=(ordered_value_type const &) const { + PANIC(); + } + bool operator>=(ordered_value_type const &) const { + PANIC(); + } }; template diff --git a/lib/utils/include/utils/archetypes/rapidcheckable_value_type.h b/lib/utils/include/utils/archetypes/rapidcheckable_value_type.h new file mode 100644 index 0000000000..80510464e8 --- /dev/null +++ b/lib/utils/include/utils/archetypes/rapidcheckable_value_type.h @@ -0,0 +1,76 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ARCHETYPES_RAPIDCHECKABLE_VALUE_TYPE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ARCHETYPES_RAPIDCHECKABLE_VALUE_TYPE_H + +#include +#include +#include +#include +#include +#include + +namespace FlexFlow { + +template +struct rapidcheckable_value_type { + rapidcheckable_value_type() = delete; + + rapidcheckable_value_type(rapidcheckable_value_type const &) { + PANIC(); + } + rapidcheckable_value_type &operator=(rapidcheckable_value_type const &) { + PANIC(); + } + + rapidcheckable_value_type(rapidcheckable_value_type &&) { + PANIC(); + } + rapidcheckable_value_type &operator=(rapidcheckable_value_type &&) { + PANIC(); + } + + bool operator==(rapidcheckable_value_type const &) const { + PANIC(); + } + bool operator!=(rapidcheckable_value_type const &) const { + PANIC(); + } + bool operator<(rapidcheckable_value_type const &) const { + PANIC(); + } +}; + +template +std::string format_as(rapidcheckable_value_type const &) { + PANIC(); +} + +template +std::ostream &operator<<(std::ostream &s, rapidcheckable_value_type const &x) { + PANIC(); +} + +} // namespace FlexFlow + +namespace rc { + +template +struct Arbitrary<::FlexFlow::rapidcheckable_value_type> { + static Gen<::FlexFlow::rapidcheckable_value_type> arbitrary() { + PANIC(); + } +}; + +} // namespacer rc + +namespace std { + +template +struct hash<::FlexFlow::rapidcheckable_value_type> { + size_t operator()(::FlexFlow::rapidcheckable_value_type const &) const { + PANIC(); + }; +}; + +} // namespace std + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h b/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h index 83afc32e0c..495cfcc667 100644 --- a/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h +++ b/lib/utils/include/utils/bidict/algorithms/bidict_from_enumerating.h @@ -2,11 +2,27 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_BIDICT_FROM_ENUMERATING_H #include "utils/bidict/bidict.h" +#include "utils/containers/contains_duplicates.h" #include "utils/nonnegative_int/nonnegative_int.h" +#include #include namespace FlexFlow { +template +bidict bidict_from_enumerating(std::vector const &s) { + ASSERT(!contains_duplicates(s)); + + bidict result; + nonnegative_int idx = 0_n; + for (T const &t : s) { + result.equate(idx, t); + idx++; + } + + return result; +} + template bidict bidict_from_enumerating(std::unordered_set const &s) { diff --git a/lib/utils/include/utils/bidict/algorithms/bidict_from_keys_and_values.h b/lib/utils/include/utils/bidict/algorithms/bidict_from_keys_and_values.h index 47af03591a..ddf1c92c75 100644 --- a/lib/utils/include/utils/bidict/algorithms/bidict_from_keys_and_values.h +++ b/lib/utils/include/utils/bidict/algorithms/bidict_from_keys_and_values.h @@ -4,21 +4,14 @@ #include "utils/bidict/algorithms/bidict_from_pairs.h" #include "utils/bidict/bidict.h" #include "utils/containers/zip.h" -#include "utils/exception.h" +#include namespace FlexFlow { template bidict bidict_from_keys_and_values(std::vector const &ls, std::vector const &rs) { - size_t l_size = ls.size(); - size_t r_size = rs.size(); - if (l_size != r_size) { - throw mk_runtime_error(fmt::format( - "recieved keys (of size {}) not matching values (of size {})", - l_size, - r_size)); - } + ASSERT(ls.size() == rs.size()); return bidict_from_pairs(zip(ls, rs)); } diff --git a/lib/utils/include/utils/bidict/algorithms/exhaustive_relational_join.h b/lib/utils/include/utils/bidict/algorithms/exhaustive_relational_join.h new file mode 100644 index 0000000000..2b3e6b89d8 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/exhaustive_relational_join.h @@ -0,0 +1,27 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_EXHAUSTIVE_RELATIONAL_JOIN_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_EXHAUSTIVE_RELATIONAL_JOIN_H + +#include "utils/bidict/bidict.h" +#include "utils/exception.h" +#include "utils/bidict/algorithms/left_entries.h" +#include "utils/bidict/algorithms/right_entries.h" + +namespace FlexFlow { + +template +bidict exhaustive_relational_join(bidict const &fst, + bidict const &snd) { + ASSERT(right_entries(fst) == left_entries(snd)); + + bidict result; + + for (auto const &[v1, v2] : fst) { + result.equate({v1, snd.at_l(v2)}); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/filter_keys.h b/lib/utils/include/utils/bidict/algorithms/filter_keys.h new file mode 100644 index 0000000000..2734dfaeb5 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/filter_keys.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTER_KEYS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTER_KEYS_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template +bidict filter_keys(bidict const &m, F &&f) { + bidict result; + for (auto const &kv : m) { + if (f(kv.first)) { + result.equate(kv); + } + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/filter_values.h b/lib/utils/include/utils/bidict/algorithms/filter_values.h new file mode 100644 index 0000000000..5817578e79 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/filter_values.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTER_VALUES_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTER_VALUES_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template +bidict filter_values(bidict const &m, F &&f) { + bidict result; + for (auto const &kv : m) { + if (f(kv.second)) { + result.equate(kv); + } + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/filtrans_keys.h b/lib/utils/include/utils/bidict/algorithms/filtrans_keys.h new file mode 100644 index 0000000000..df6495b400 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/filtrans_keys.h @@ -0,0 +1,25 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTRANS_KEYS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTRANS_KEYS_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template ::value_type> +bidict filtrans_keys(bidict const &m, F &&f) { + bidict result; + for (auto const &[k, v] : m) { + std::optional new_k = f(k); + if (new_k.has_value()) { + result.equate(new_k.value(), v); + } + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/filtrans_values.h b/lib/utils/include/utils/bidict/algorithms/filtrans_values.h new file mode 100644 index 0000000000..11180938b8 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/filtrans_values.h @@ -0,0 +1,25 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTRANS_VALUES_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_FILTRANS_VALUES_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template ::value_type> +bidict filtrans_values(bidict const &m, F &&f) { + bidict result; + for (auto const &[k, v] : m) { + std::optional new_v = f(v); + if (new_v.has_value()) { + result.equate(k, new_v.value()); + } + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/transform.h b/lib/utils/include/utils/bidict/algorithms/transform.h new file mode 100644 index 0000000000..7fbdd07db7 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/transform.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_TRANSFORM_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_TRANSFORM_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template ::first_type, + typename V2 = typename std::invoke_result_t::second_type> +bidict transform(bidict const &m, F &&f) { + bidict result; + for (auto const &[k, v] : m) { + result.equate(f(k, v)); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/transform_keys.h b/lib/utils/include/utils/bidict/algorithms/transform_keys.h new file mode 100644 index 0000000000..8ecb10c401 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/transform_keys.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_TRANSFORM_KEYS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_TRANSFORM_KEYS_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template > +bidict transform_keys(bidict const &m, F &&f) { + bidict result; + for (auto const &kv : m) { + result.equate(f(kv.first), kv.second); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/transform_values.h b/lib/utils/include/utils/bidict/algorithms/transform_values.h new file mode 100644 index 0000000000..ef5b34ebe9 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/transform_values.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_TRANSFORM_VALUES_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_TRANSFORM_VALUES_H + +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template > +bidict transform_values(bidict const &m, F &&f) { + bidict result; + for (auto const &kv : m) { + result.equate({kv.first, f(kv.second)}); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/algorithms/unordered_set_of.h b/lib/utils/include/utils/bidict/algorithms/unordered_set_of.h new file mode 100644 index 0000000000..1465b39c09 --- /dev/null +++ b/lib/utils/include/utils/bidict/algorithms/unordered_set_of.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_UNORDERED_SET_OF_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_ALGORITHMS_UNORDERED_SET_OF_H + +#include "utils/hash/pair.h" +#include "utils/bidict/bidict.h" + +namespace FlexFlow { + +template +std::unordered_set> unordered_set_of(bidict const &c) { + std::unordered_set> result; + + for (auto const &lr : c) { + result.insert(lr); + } + + return result; +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bidict/bidict.h b/lib/utils/include/utils/bidict/bidict.h index 8b19313002..26c300c1bb 100644 --- a/lib/utils/include/utils/bidict/bidict.h +++ b/lib/utils/include/utils/bidict/bidict.h @@ -1,11 +1,18 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_BIDICT_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_BIDICT_H +#include "utils/containers/keys.h" #include "utils/fmt/unordered_map.h" #include "utils/hash/unordered_map.h" #include #include #include +#include +#include "utils/json/check_is_json_serializable.h" +#include "utils/json/check_is_json_deserializable.h" +#include "utils/containers/map_from_keys_and_values.h" +#include +#include "utils/ord/unordered_map.h" namespace FlexFlow { @@ -85,6 +92,14 @@ struct bidict { return bwd_map.at(r); } + std::unordered_set left_values() const { + return keys(this->fwd_map); + } + + std::unordered_set right_values() const { + return keys(this->bwd_map); + } + std::size_t size() const { assert(fwd_map.size() == bwd_map.size()); return fwd_map.size(); @@ -195,6 +210,12 @@ struct bidict { std::unordered_map bwd_map; }; +template +std::enable_if_t && is_lt_comparable_v, bool> + operator<(bidict const &lhs, bidict const &rhs) { + return lhs.as_unordered_map() < rhs.as_unordered_map(); +} + template std::unordered_map format_as(bidict const &b) { return b; @@ -208,96 +229,56 @@ std::ostream &operator<<(std::ostream &s, bidict const &b) { return s << fmt::to_string(b); } -template ()(std::declval()))> -bidict map_keys(bidict const &m, F const &f) { - bidict result; - for (auto const &kv : m) { - result.equate(f(kv.first), kv.second); - } - return result; -} +} // namespace FlexFlow -template ()(std::declval()))> -bidict map_values(bidict const &m, F const &f) { - bidict result; - for (auto const &kv : m) { - result.equate({kv.first, f(kv.second)}); - } - return result; -} +namespace nlohmann { -template -bidict filter_keys(bidict const &m, F const &f) { - bidict result; - for (auto const &kv : m) { - if (f(kv.first)) { - result.equate(kv); - } - } - return result; -} +template +struct adl_serializer<::FlexFlow::bidict> { + static ::FlexFlow::bidict from_json(json const &j) { + CHECK_IS_JSON_DESERIALIZABLE(L); + CHECK_IS_JSON_DESERIALIZABLE(R); -template -bidict filter_values(bidict const &m, F const &f) { - bidict result; - for (auto const &kv : m) { - if (f(kv.second)) { - result.equate(kv); - } - } - return result; -} + std::unordered_map m = j; -template ::value_type> -bidict filtermap_keys(bidict const &m, F const &f) { - bidict result; - for (auto const &[k, v] : m) { - std::optional new_k = f(k); - if (new_k.has_value()) { - result.equate(new_k.value(), v); - } + ::FlexFlow::bidict b{m.cbegin(), m.cend()}; + + return b; } - return result; -} + static void to_json(json &j, ::FlexFlow::bidict const &b) { + CHECK_IS_JSON_SERIALIZABLE(L); + CHECK_IS_JSON_SERIALIZABLE(R); -template ::value_type> -bidict filtermap_values(bidict const &m, F const &f) { - bidict result; - for (auto const &[k, v] : m) { - std::optional new_v = f(v); - if (new_v.has_value()) { - result.equate(k, new_v.value()); - } + j = b.as_unordered_map(); } - return result; -} +}; + +} // namespace nlohmann -template ::first_type, - typename V2 = typename std::invoke_result_t::second_type> -bidict transform(bidict const &m, F const &f) { - bidict result; - for (auto const &[k, v] : m) { - result.equate(f(k, v)); +namespace rc { + +template +struct Arbitrary<::FlexFlow::bidict> { + static Gen<::FlexFlow::bidict> arbitrary() { + return gen::map( + gen::withSize( + [](int size) -> Gen> { + return gen::apply( + [](std::vector const &keys, std::vector const &values) + -> std::unordered_map + { + return ::FlexFlow::map_from_keys_and_values(keys, values); + }, + gen::unique>(size, gen::arbitrary()), + gen::unique>(size, gen::arbitrary())); + }), + [](std::unordered_map const &m) { + return ::FlexFlow::bidict{m.cbegin(), m.cend()}; + }); } - return result; -} +}; -} // namespace FlexFlow +} namespace std { diff --git a/lib/utils/include/utils/bijection/bijection.h b/lib/utils/include/utils/bijection/bijection.h new file mode 100644 index 0000000000..a8fdac22ff --- /dev/null +++ b/lib/utils/include/utils/bijection/bijection.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIJECTION_BIJECTION_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIJECTION_BIJECTION_H + +#include "utils/bijection/bijection.dtg.h" + +namespace FlexFlow { + +template +Bijection flip_bijection(Bijection const &b) { + return Bijection{ + /*l_to_r=*/b.r_to_l, + /*r_to_l=*/b.l_to_r, + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/bijection/bijection.struct.toml b/lib/utils/include/utils/bijection/bijection.struct.toml new file mode 100644 index 0000000000..fb7702318c --- /dev/null +++ b/lib/utils/include/utils/bijection/bijection.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "Bijection" +features = [] + +template_params = [ "L", "R" ] + +includes = [ + "" +] + +[[fields]] +name = "l_to_r" +type = "std::function" + +[[fields]] +name = "r_to_l" +type = "std::function" diff --git a/lib/utils/include/utils/bijection/to.struct.toml b/lib/utils/include/utils/bijection/to.struct.toml new file mode 100644 index 0000000000..76d47ae488 --- /dev/null +++ b/lib/utils/include/utils/bijection/to.struct.toml @@ -0,0 +1,13 @@ +namespace = "FlexFlow" +name = "To" +features = [] + +template_params = [ "L", "R" ] + +includes = [ + "" +] + +[[fields]] +name = "func" +type = "std::function" diff --git a/lib/utils/include/utils/cli/cli_flag_key.struct.toml b/lib/utils/include/utils/cli/cli_flag_key.struct.toml index 9c02fddc3e..ad3e9ac193 100644 --- a/lib/utils/include/utils/cli/cli_flag_key.struct.toml +++ b/lib/utils/include/utils/cli/cli_flag_key.struct.toml @@ -2,6 +2,7 @@ namespace = "FlexFlow" name = "CLIFlagKey" features = [ "eq", + "ord", "hash", "fmt", ] diff --git a/lib/utils/include/utils/containers/all_of.h b/lib/utils/include/utils/containers/all_of.h index fb44aeaed8..ef5aac1c41 100644 --- a/lib/utils/include/utils/containers/all_of.h +++ b/lib/utils/include/utils/containers/all_of.h @@ -1,10 +1,14 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ALL_OF_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ALL_OF_H +#include +#include +#include + namespace FlexFlow { template -bool all_of(C const &c, F const &f) { +bool all_of(C const &c, F &&f) { for (auto const &v : c) { if (!f(v)) { return false; @@ -13,6 +17,30 @@ bool all_of(C const &c, F const &f) { return true; } +template +bool all_of(std::unordered_map const &m, F &&f) { + for (auto const &[k, v] : m) { + if (!f(k, v)) { + return false; + } + } + + return true; +} + +template +bool all_of(std::map const &m, F &&f) { + for (auto const &[k, v] : m) { + if (!f(k, v)) { + return false; + } + } + + return true; +} + +bool all_of(std::vector const &); + } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/at_idx.h b/lib/utils/include/utils/containers/at_idx.h index fdc13a0231..99c7333a3a 100644 --- a/lib/utils/include/utils/containers/at_idx.h +++ b/lib/utils/include/utils/containers/at_idx.h @@ -4,16 +4,15 @@ #include "utils/nonnegative_int/nonnegative_int.h" #include #include +#include namespace FlexFlow { template -std::optional at_idx(std::vector const &v, nonnegative_int idx) { - if (idx >= v.size()) { - return std::nullopt; - } else { - return v.at(idx.unwrap_nonnegative()); - } +E at_idx(std::vector const &v, nonnegative_int idx) { + ASSERT(idx < v.size()); + + return v.at(idx.unwrap_nonnegative()); } } // namespace FlexFlow diff --git a/lib/utils/include/utils/containers/binary_cartesian_product.h b/lib/utils/include/utils/containers/binary_cartesian_product.h new file mode 100644 index 0000000000..211a90e2e0 --- /dev/null +++ b/lib/utils/include/utils/containers/binary_cartesian_product.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_CARTESIAN_PRODUCT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_CARTESIAN_PRODUCT_H + +#include +#include "utils/hash/pair.h" + +namespace FlexFlow { + +template +std::unordered_set> + binary_cartesian_product(std::unordered_set const &lhs, + std::unordered_set const &rhs) { + std::unordered_set> result; + + for (A const &a : lhs) { + for (B const &b : rhs) { + result.insert({a, b}); + } + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/binary_merge_disjoint_maps.h b/lib/utils/include/utils/containers/binary_merge_disjoint_maps.h new file mode 100644 index 0000000000..8182888f8b --- /dev/null +++ b/lib/utils/include/utils/containers/binary_merge_disjoint_maps.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_DISJOINT_MAPS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_DISJOINT_MAPS_H + +#include "utils/containers/binary_merge_maps_with.h" +#include + +namespace FlexFlow { + +template +std::unordered_map binary_merge_disjoint_maps( + std::unordered_map const &lhs, + std::unordered_map const &rhs) { + + std::unordered_set lhs_keys = keys(lhs); + std::unordered_set rhs_keys = keys(rhs); + + std::unordered_set shared_keys = intersection(lhs_keys, rhs_keys); + ASSERT(shared_keys.empty()); + + return binary_merge_maps_with( + lhs, rhs, [](V const &, V const &) -> V { + PANIC(); + }); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/binary_merge_maps_with.h b/lib/utils/include/utils/containers/binary_merge_maps_with.h new file mode 100644 index 0000000000..5d65c1859f --- /dev/null +++ b/lib/utils/include/utils/containers/binary_merge_maps_with.h @@ -0,0 +1,45 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_MAPS_WITH_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_MAPS_WITH_H + +#include +#include "utils/containers/keys.h" +#include "utils/containers/intersection.h" +#include "utils/containers/set_minus.h" +#include "utils/containers/restrict_keys.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/merge_maps_with_right_dominating.h" + +namespace FlexFlow { + +template +std::unordered_map binary_merge_maps_with( + std::unordered_map const &lhs, + std::unordered_map const &rhs, + F &&f) { + + std::unordered_set l_keys = keys(lhs); + std::unordered_set r_keys = keys(rhs); + + std::unordered_set l_only_keys = set_minus(l_keys, r_keys); + std::unordered_set r_only_keys = set_minus(r_keys, l_keys); + std::unordered_set both_keys = intersection(r_keys, l_keys); + + std::unordered_map l_only = restrict_keys(lhs, l_only_keys); + std::unordered_map r_only = restrict_keys(rhs, r_only_keys); + + std::unordered_map merged = + generate_map(both_keys, + [&](K const &k) { + return f(lhs.at(k), rhs.at(k)); + }); + + return merge_maps_with_right_dominating(std::vector{ + l_only, + r_only, + merged, + }); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/binary_merge_maps_with_left_dominating.h b/lib/utils/include/utils/containers/binary_merge_maps_with_left_dominating.h new file mode 100644 index 0000000000..1ad9ad4e9b --- /dev/null +++ b/lib/utils/include/utils/containers/binary_merge_maps_with_left_dominating.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_MAPS_WITH_LEFT_DOMINATING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_MAPS_WITH_LEFT_DOMINATING_H + +#include "utils/containers/merge_in_map.h" + +namespace FlexFlow { + +template +std::unordered_map + binary_merge_maps_with_left_dominating(std::unordered_map const &lhs, + std::unordered_map const &rhs) { + std::unordered_map result; + merge_in_map(rhs, result); + merge_in_map(lhs, result); + return result; +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/binary_merge_maps_with_right_dominating.h b/lib/utils/include/utils/containers/binary_merge_maps_with_right_dominating.h new file mode 100644 index 0000000000..637705cc50 --- /dev/null +++ b/lib/utils/include/utils/containers/binary_merge_maps_with_right_dominating.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_MAPS_WITH_RIGHT_DOMINATING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_BINARY_MERGE_MAPS_WITH_RIGHT_DOMINATING_H + +#include "utils/containers/merge_in_map.h" + +namespace FlexFlow { + +template +std::unordered_map + binary_merge_maps_with_right_dominating(std::unordered_map const &lhs, + std::unordered_map const &rhs) { + std::unordered_map result; + merge_in_map(lhs, result); + merge_in_map(rhs, result); + return result; +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/contains_duplicates.h b/lib/utils/include/utils/containers/contains_duplicates.h new file mode 100644 index 0000000000..8203c5e882 --- /dev/null +++ b/lib/utils/include/utils/containers/contains_duplicates.h @@ -0,0 +1,27 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_CONTAINS_DUPLICATES_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_CONTAINS_DUPLICATES_H + +#include "utils/containers/unordered_set_of.h" +#include +#include + +namespace FlexFlow { + +template +bool contains_duplicates(std::vector const &s) { + return unordered_set_of(s).size() != s.size(); +} + +template +bool contains_duplicates(std::unordered_multiset const &s) { + return unordered_set_of(s).size() != s.size(); +} + +template +bool contains_duplicates(std::multiset const &s) { + return unordered_set_of(s).size() != s.size(); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/count.h b/lib/utils/include/utils/containers/count.h index bae4ba104c..7e01390131 100644 --- a/lib/utils/include/utils/containers/count.h +++ b/lib/utils/include/utils/containers/count.h @@ -3,12 +3,13 @@ #include #include +#include "utils/nonnegative_int/nonnegative_int.h" namespace FlexFlow { template -int count(C const &c, F const &f) { - int result = 0; +nonnegative_int count(C const &c, F const &f) { + nonnegative_int result = 0_n; for (auto const &v : c) { if (f(v)) { result++; @@ -17,8 +18,6 @@ int count(C const &c, F const &f) { return result; } -std::vector count(size_t n); - } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/extend.h b/lib/utils/include/utils/containers/extend.h index fa4e2d24a8..8ce215c96c 100644 --- a/lib/utils/include/utils/containers/extend.h +++ b/lib/utils/include/utils/containers/extend.h @@ -2,8 +2,8 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_EXTEND_H #include "utils/containers/extend_vector.h" -#include #include +#include namespace FlexFlow { @@ -12,24 +12,26 @@ void extend(std::vector &lhs, C const &rhs) { extend_vector(lhs, rhs); } -template -void extend(std::vector &lhs, std::optional const &rhs) { - if (rhs.has_value()) { - extend(lhs, std::vector{rhs.value()}); - } +template +void extend(std::unordered_set &lhs, C const &rhs) { + lhs.reserve(lhs.size() + std::distance(rhs.begin(), rhs.end())); + lhs.insert(rhs.cbegin(), rhs.cend()); } template -void extend(std::unordered_set &lhs, C const &rhs) { +void extend(std::unordered_multiset &lhs, C const &rhs) { lhs.reserve(lhs.size() + std::distance(rhs.begin(), rhs.end())); lhs.insert(rhs.cbegin(), rhs.cend()); } -template -void extend(std::unordered_set &lhs, std::optional const &rhs) { - if (rhs.has_value()) { - extend(lhs, std::vector{rhs.value()}); - } +template +void extend(std::set &lhs, C const &rhs) { + lhs.insert(rhs.cbegin(), rhs.cend()); +} + +template +void extend(std::multiset &lhs, C const &rhs) { + lhs.insert(rhs.cbegin(), rhs.cend()); } } // namespace FlexFlow diff --git a/lib/utils/include/utils/containers/filter_idxs.h b/lib/utils/include/utils/containers/filter_idxs.h new file mode 100644 index 0000000000..0de9a05130 --- /dev/null +++ b/lib/utils/include/utils/containers/filter_idxs.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_FILTER_IDXS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_FILTER_IDXS_H + +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/nonnegative_int/range.h" +#include +#include + +namespace FlexFlow { + +template +std::vector filter_idxs(std::vector const &input, + std::function const &f) { + std::vector result; + + for (nonnegative_int idx : range(num_elements(input))) { + if (f(idx)) { + result.push_back(input.at(idx.unwrap_nonnegative())); + } + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/flatmap.h b/lib/utils/include/utils/containers/flatmap.h index eaa8d1dbef..541414ed11 100644 --- a/lib/utils/include/utils/containers/flatmap.h +++ b/lib/utils/include/utils/containers/flatmap.h @@ -3,7 +3,7 @@ #include "utils/containers/extend.h" #include "utils/containers/get_element_type.h" -#include "utils/containers/merge_maps.h" +#include "utils/containers/binary_merge_disjoint_maps.h" #include #include #include @@ -13,7 +13,7 @@ namespace FlexFlow { template ::value_type> -std::vector flatmap(std::vector const &v, F const &f) { +std::vector flatmap(std::vector const &v, F &&f) { std::vector result; for (auto const &elem : v) { extend(result, f(elem)); @@ -24,7 +24,7 @@ std::vector flatmap(std::vector const &v, F const &f) { template >> -std::unordered_set flatmap(std::unordered_set const &v, F const &f) { +std::unordered_set flatmap(std::unordered_set const &v, F &&f) { std::unordered_set result; for (auto const &elem : v) { extend(result, f(elem)); @@ -32,10 +32,11 @@ std::unordered_set flatmap(std::unordered_set const &v, F const &f) { return result; } -template -std::unordered_set flatmap_v2(std::unordered_set const &v, - std::unordered_set (*f)(In const &)) { - std::unordered_set result; +template >> +std::unordered_multiset flatmap(std::unordered_multiset const &v, F &&f) { + std::unordered_multiset result; for (auto const &elem : v) { extend(result, f(elem)); } @@ -45,7 +46,7 @@ std::unordered_set flatmap_v2(std::unordered_set const &v, template >> -std::set flatmap(std::set const &v, F const &f) { +std::set flatmap(std::set const &v, F &&f) { std::set result; for (auto const &elem : v) { extend(result, f(elem)); @@ -53,6 +54,17 @@ std::set flatmap(std::set const &v, F const &f) { return result; } +template >> +std::multiset flatmap(std::multiset const &v, F &&f) { + std::multiset result; + for (auto const &elem : v) { + extend(result, f(elem)); + } + return result; +} + template < typename InK, typename InV, @@ -64,14 +76,26 @@ std::unordered_map flatmap(std::unordered_map const &m, std::unordered_map result; for (auto const &[k, v] : m) { - result = merge_disjoint_maps(result, f(k, v)); + result = binary_merge_disjoint_maps(result, f(k, v)); } return result; } +template ::value_type> +std::optional flatmap(std::optional const &o, F &&f) { + if (o.has_value()) { + std::optional r = f(o.value()); + return r; + } else { + return std::nullopt; + } +} + template -std::string flatmap(std::string const &input, F const &f) { +std::string flatmap(std::string const &input, F &&f) { std::string result = ""; for (char c : input) { diff --git a/lib/utils/include/utils/containers/foldl.h b/lib/utils/include/utils/containers/foldl.h index 16851d7d9b..5b99b23a7c 100644 --- a/lib/utils/include/utils/containers/foldl.h +++ b/lib/utils/include/utils/containers/foldl.h @@ -25,7 +25,7 @@ namespace FlexFlow { * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:foldl */ template -T foldl(C const &c, T init, F func) { +T foldl(C const &c, T const &init, F func) { T result = init; for (auto const &elem : c) { result = func(result, elem); @@ -33,40 +33,6 @@ T foldl(C const &c, T init, F func) { return result; } -/** - * @brief - * Applies `func` to the elements of `c` from left to right, accumulating the - * result. The first element of `c` is used as the starting point for the - * accumulation. - * - * @example - * std::vector nums = {1, 2, 3, 4}; - * int result = foldl1(nums, [](int a, int b) { return a + b; }); - * result -> (((1+2)+3)+4) = 10 - * - * @note - * For more information, see - * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:foldl1 - * @throws std::runtime_error if the container is empty. - */ -template -E foldl1(C const &c, F func) { - if (c.empty()) { - throw mk_runtime_error( - fmt::format("foldl1 received empty container: {}", c)); - } - std::optional result = std::nullopt; - - for (E const &e : c) { - if (!result.has_value()) { - result = e; - } else { - result = func(result.value(), e); - } - } - return result.value(); -} - } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/foldl1.h b/lib/utils/include/utils/containers/foldl1.h index f542f8cf00..2691252ee6 100644 --- a/lib/utils/include/utils/containers/foldl1.h +++ b/lib/utils/include/utils/containers/foldl1.h @@ -1,27 +1,41 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_FOLDL1_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_FOLDL1_H -#include "utils/exception.h" #include +#include namespace FlexFlow { -template -T foldl1(std::vector const &vec, F f) { - if (vec.empty()) { - throw mk_runtime_error(fmt::format( - "foldl1 expected non-empty vector, but receieved empty vector")); +/** + * @brief + * Applies `func` to the elements of `c` from left to right, accumulating the + * result. The first element of `c` is used as the starting point for the + * accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * int result = foldl1(nums, [](int a, int b) { return a + b; }); + * result -> (((1+2)+3)+4) = 10 + * + * @note + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:foldl1 + * @throws std::runtime_error if the container is empty. + */ +template +E foldl1(C const &c, F func) { + ASSERT(!c.empty(), + "foldl1 expected non-empty vector, but received empty vector"); + std::optional result = std::nullopt; + + for (E const &e : c) { + if (!result.has_value()) { + result = e; + } else { + result = func(result.value(), e); + } } - - auto it = vec.cbegin(); - T result = *it; - it++; - - for (; it != vec.cend(); it++) { - result = f(result, *it); - } - - return result; + return result.value(); } } // namespace FlexFlow diff --git a/lib/utils/include/utils/containers/foldr.h b/lib/utils/include/utils/containers/foldr.h new file mode 100644 index 0000000000..6eee3ea7f7 --- /dev/null +++ b/lib/utils/include/utils/containers/foldr.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_FOLDR_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_FOLDR_H + +#include "utils/exception.h" +#include + +namespace FlexFlow { + +template +T foldr1(std::vector const &vec, F f) { + if (vec.empty()) { + throw mk_runtime_error(fmt::format( + "foldr1 expected non-empty vector, but receieved empty vector")); + } + + auto it = vec.crbegin(); + T result = *it; + it++; + for (; it != vec.crend(); it++) { + result = f(result, *it); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/generate_vector.h b/lib/utils/include/utils/containers/generate_vector.h new file mode 100644 index 0000000000..71e8cc5acb --- /dev/null +++ b/lib/utils/include/utils/containers/generate_vector.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GENERATE_VECTOR_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GENERATE_VECTOR_H + +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/nonnegative_int/range.h" +#include + +namespace FlexFlow { + +template > +std::vector generate_vector(nonnegative_int length, F &&f) { + std::vector result; + for (nonnegative_int idx : range(length)) { + result.push_back(f(idx)); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/group_by.h b/lib/utils/include/utils/containers/group_by.h index 6abffbfed0..0042edeea3 100644 --- a/lib/utils/include/utils/containers/group_by.h +++ b/lib/utils/include/utils/containers/group_by.h @@ -1,18 +1,40 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GROUP_BY_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GROUP_BY_H +#include #include #include #include +#include +#include "utils/one_to_many/one_to_many.h" namespace FlexFlow { template > -std::unordered_map> - group_by(std::unordered_set const &vs, F f) { - std::unordered_map> result; +OneToMany + group_by(std::unordered_set const &vs, F &&f) { + OneToMany result; for (V const &v : vs) { - result[f(v)].insert(v); + result.insert({f(v), v}); + } + return result; +} + +template > +OneToMany group_by(std::set const &vs, F &&f) { + OneToMany result; + for (V const &v : vs) { + result.insert({f(v), v}); + } + return result; +} + +template > +std::unordered_map> group_by(std::vector const &vs, + F &&f) { + std::unordered_map> result; + for (V const &v : vs) { + result[f(v)].push_back(v); } return result; } diff --git a/lib/utils/include/utils/containers/intersection.h b/lib/utils/include/utils/containers/intersection.h index 938ebd68c9..55e6c7a5f8 100644 --- a/lib/utils/include/utils/containers/intersection.h +++ b/lib/utils/include/utils/containers/intersection.h @@ -3,6 +3,7 @@ #include "utils/containers/contains.h" #include +#include #include namespace FlexFlow { @@ -19,6 +20,17 @@ std::unordered_set intersection(std::unordered_set const &l, return result; } +template +std::set intersection(std::set const &l, std::set const &r) { + std::set result; + for (T const &ll : l) { + if (contains(r, ll)) { + result.insert(ll); + } + } + return result; +} + template std::optional intersection(C const &c) { std::optional result; diff --git a/lib/utils/include/utils/containers/is_subseteq_of.h b/lib/utils/include/utils/containers/is_subseteq_of.h index 705c092962..e435aa24dd 100644 --- a/lib/utils/include/utils/containers/is_subseteq_of.h +++ b/lib/utils/include/utils/containers/is_subseteq_of.h @@ -2,6 +2,7 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_IS_SUBSETEQ_OF_H #include "utils/containers/contains.h" +#include #include namespace FlexFlow { @@ -21,6 +22,20 @@ bool is_subseteq_of(std::unordered_set const &sub, return true; } +template +bool is_subseteq_of(std::set const &l, std::set const &r) { + if (l.size() > r.size()) { + return false; + } + + for (auto const &ll : l) { + if (!contains(r, ll)) { + return false; + } + } + return true; +} + } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/lift_optional_through_map.h b/lib/utils/include/utils/containers/lift_optional_through_map.h new file mode 100644 index 0000000000..76f293f239 --- /dev/null +++ b/lib/utils/include/utils/containers/lift_optional_through_map.h @@ -0,0 +1,41 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_LIFT_OPTIONAL_THROUGH_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_LIFT_OPTIONAL_THROUGH_MAP_H + +#include +#include +#include "utils/containers/values.h" +#include + +namespace FlexFlow { + +template +static std::optional> lift_optional_through_map(std::unordered_map> const &m) { + ASSERT(!m.empty()); + + std::unordered_multiset> values = values(m); + + bool has_all_values + = all_of(values, [](std::optional const &t) -> bool { + return t.has_value(); + }); + + bool has_no_values + = all_of(values, [](std::optional const &t) -> bool { + return !t.has_value(); + }); + + ASSERT(has_all_values || has_no_values); + if (has_no_values) { + return std::nullopt; + } else { + return map_values(has_all_values, + [](std::optional const &t) -> V { + return t.value(); + }); + } +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/map_from_keys_and_values.h b/lib/utils/include/utils/containers/map_from_keys_and_values.h index 499965dc5e..8a9e36ff4e 100644 --- a/lib/utils/include/utils/containers/map_from_keys_and_values.h +++ b/lib/utils/include/utils/containers/map_from_keys_and_values.h @@ -2,7 +2,8 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_FROM_KEYS_AND_VALUES_H #include "utils/containers/zip.h" -#include "utils/exception.h" +#include +#include #include namespace FlexFlow { @@ -11,12 +12,8 @@ template std::unordered_map map_from_keys_and_values(std::vector const &keys, std::vector const &values) { - if (keys.size() != values.size()) { - throw mk_runtime_error(fmt::format( - "recieved keys (of size {}) not matching values (of size {})", - keys.size(), - values.size())); - } + ASSERT(keys.size() == values.size()); + std::unordered_map result; for (auto const &[k, v] : zip(keys, values)) { result.insert({k, v}); diff --git a/lib/utils/include/utils/containers/map_from_pairs.h b/lib/utils/include/utils/containers/map_from_pairs.h new file mode 100644 index 0000000000..748c24664e --- /dev/null +++ b/lib/utils/include/utils/containers/map_from_pairs.h @@ -0,0 +1,20 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_FROM_PAIRS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_FROM_PAIRS_H + +#include +#include + +namespace FlexFlow { + +template +std::unordered_map + map_from_pairs(std::unordered_set> const &pairs) { + + std::unordered_map result(pairs.cbegin(), pairs.cend()); + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/map_keys.h b/lib/utils/include/utils/containers/map_keys.h index 4e5352748d..9d89337f6a 100644 --- a/lib/utils/include/utils/containers/map_keys.h +++ b/lib/utils/include/utils/containers/map_keys.h @@ -25,10 +25,9 @@ std::unordered_map map_keys(std::unordered_map const &m, for (auto const &kv : m) { result.insert({f(kv.first), kv.second}); } - if (keys(m).size() != keys(result).size()) { - throw mk_runtime_error( - "keys passed to map_keys must be transformed into distinct keys"); - } + + ASSERT(keys(m).size() == keys(result).size(), + "keys passed to map_keys must be transformed into distinct keys"); return result; } diff --git a/lib/utils/include/utils/containers/map_keys2.h b/lib/utils/include/utils/containers/map_keys2.h new file mode 100644 index 0000000000..d3b1fdbb49 --- /dev/null +++ b/lib/utils/include/utils/containers/map_keys2.h @@ -0,0 +1,31 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_KEYS2_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_KEYS2_H + +#include +#include "utils/containers/keys.h" +#include + +namespace FlexFlow { + +template > +std::unordered_map map_keys(std::unordered_map const &m, + F const &f) { + + std::unordered_map result; + for (auto const &kv : m) { + result.insert({f(kv.first, kv.second), kv.second}); + } + + ASSERT(keys(m).size() == keys(result).size(), + "keys passed to map_keys must be transformed into distinct keys"); + + return result; +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/map_keys_with_value_merging.h b/lib/utils/include/utils/containers/map_keys_with_value_merging.h new file mode 100644 index 0000000000..dce80dd21c --- /dev/null +++ b/lib/utils/include/utils/containers/map_keys_with_value_merging.h @@ -0,0 +1,38 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_KEYS_WITH_VALUE_MERGING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_KEYS_WITH_VALUE_MERGING_H + +#include +#include "utils/containers/contains_key.h" + +namespace FlexFlow { + +template > +std::unordered_map map_keys_with_value_merging(std::unordered_map const &m, + F &&key_func, + MergeF &&merge_values) { + + std::unordered_map result; + + for (auto const &kv : m) { + K k = kv.first; + V v = kv.second; + + K2 k2 = key_func(k); + + if (contains_key(result, k2)) { + result.at(k2) = merge_values(result.at(k2), v); + } else { + result.insert({k2, v}); + } + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/map_values.h b/lib/utils/include/utils/containers/map_values.h index 9f7a4f4add..bf377b2c93 100644 --- a/lib/utils/include/utils/containers/map_values.h +++ b/lib/utils/include/utils/containers/map_values.h @@ -10,11 +10,10 @@ template > -std::unordered_map map_values(std::unordered_map const &m, - F const &f) { +std::unordered_map map_values(std::unordered_map const &m, F &&f) { std::unordered_map result; - for (auto const &kv : m) { - result.insert({kv.first, f(kv.second)}); + for (std::pair const &kv : m) { + result.insert(std::pair{kv.first, f(kv.second)}); } return result; } diff --git a/lib/utils/include/utils/containers/map_values2.h b/lib/utils/include/utils/containers/map_values2.h new file mode 100644 index 0000000000..c30450bd8f --- /dev/null +++ b/lib/utils/include/utils/containers/map_values2.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_VALUES2_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_VALUES2_H + +#include +#include + +namespace FlexFlow { + +template > +std::unordered_map map_values2(std::unordered_map const &m, F &&f) { + std::unordered_map result; + for (std::pair const &kv : m) { + result.insert(std::pair{kv.first, f(kv.first, kv.second)}); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/merge_disjoint_maps.h b/lib/utils/include/utils/containers/merge_disjoint_maps.h new file mode 100644 index 0000000000..ff3773c9e2 --- /dev/null +++ b/lib/utils/include/utils/containers/merge_disjoint_maps.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_DISJOINT_MAPS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_DISJOINT_MAPS_H + +#include "utils/containers/binary_merge_disjoint_maps.h" +#include "utils/containers/foldl.h" + +namespace FlexFlow { + +template +std::unordered_map merge_disjoint_maps(C const &c) { + std::unordered_map empty = {}; + return foldl( + c, + /*init=*/empty, + [](std::unordered_map const &lhs, + std::unordered_map const &rhs) { + return binary_merge_disjoint_maps(lhs, rhs); + }); +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/merge_in_map.h b/lib/utils/include/utils/containers/merge_in_map.h new file mode 100644 index 0000000000..edae4b8a6a --- /dev/null +++ b/lib/utils/include/utils/containers/merge_in_map.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_IN_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_IN_MAP_H + +#include + +namespace FlexFlow { + +template +void merge_in_map(std::unordered_map const &m, + std::unordered_map &result) { + for (auto const &[k, v] : m) { + auto it = result.find(k); + if (it != result.end()) { + it->second = v; + } else { + result.insert({k, v}); + } + } +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/merge_maps.h b/lib/utils/include/utils/containers/merge_maps.h deleted file mode 100644 index bfc2446d99..0000000000 --- a/lib/utils/include/utils/containers/merge_maps.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_MAPS_H -#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_MAPS_H - -#include "utils/containers/are_disjoint.h" -#include "utils/containers/keys.h" -#include "utils/containers/merge_method.dtg.h" -#include "utils/exception.h" -#include "utils/fmt/unordered_map.h" -#include "utils/fmt/unordered_set.h" -#include - -namespace FlexFlow { - -template -void merge_in_map(std::unordered_map const &m, - std::unordered_map &result) { - for (auto const &[k, v] : m) { - auto it = result.find(k); - if (it != result.end()) { - it->second = v; - } else { - result.insert({k, v}); - } - } -} - -template -std::unordered_map - merge_disjoint_maps(std::unordered_map const &lhs, - std::unordered_map const &rhs) { - - std::unordered_set lhs_keys = keys(lhs); - std::unordered_set rhs_keys = keys(rhs); - std::unordered_set shared_keys = intersection(lhs_keys, rhs_keys); - if (!shared_keys.empty()) { - throw mk_runtime_error( - fmt::format("merge_maps expected disjoint maps, but maps share keys {}", - shared_keys)); - } - - std::unordered_map result; - merge_in_map(lhs, result); - merge_in_map(rhs, result); - return result; -} - -template -std::unordered_map - merge_map_left_dominates(std::unordered_map const &lhs, - std::unordered_map const &rhs) { - std::unordered_map result; - merge_in_map(rhs, result); - merge_in_map(lhs, result); - return result; -} - -template -std::unordered_map - merge_map_right_dominates(std::unordered_map const &lhs, - std::unordered_map const &rhs) { - std::unordered_map result; - merge_in_map(lhs, result); - merge_in_map(rhs, result); - return result; -} - -} // namespace FlexFlow - -#endif diff --git a/lib/utils/include/utils/containers/merge_maps_with.h b/lib/utils/include/utils/containers/merge_maps_with.h new file mode 100644 index 0000000000..4f5c6986be --- /dev/null +++ b/lib/utils/include/utils/containers/merge_maps_with.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_MAPS_WITH_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_MAPS_WITH_H + +#include +#include +#include "utils/containers/binary_merge_maps_with.h" +#include "utils/containers/foldl.h" + +namespace FlexFlow { + +template +std::unordered_map merge_maps_with(std::vector> const &to_merge, F &&f) { + return foldl(to_merge, + std::unordered_map{}, + [&](std::unordered_map const &accum, std::unordered_map const &m) { + return binary_merge_maps_with(accum, m, f); + }); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/merge_maps_with_right_dominating.h b/lib/utils/include/utils/containers/merge_maps_with_right_dominating.h new file mode 100644 index 0000000000..1d4f8536d8 --- /dev/null +++ b/lib/utils/include/utils/containers/merge_maps_with_right_dominating.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_MAPS_WITH_RIGHT_DOMINATING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MERGE_MAPS_WITH_RIGHT_DOMINATING_H + +#include "utils/containers/merge_in_map.h" + +namespace FlexFlow { + +template +std::unordered_map merge_maps_with_right_dominating(C const &c) { + std::unordered_map result; + + for (std::unordered_map const &m : c) { + merge_in_map(m, result); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/multiset_of.h b/lib/utils/include/utils/containers/multiset_of.h new file mode 100644 index 0000000000..79bfbc40a3 --- /dev/null +++ b/lib/utils/include/utils/containers/multiset_of.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MULTISET_OF_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MULTISET_OF_H + +#include + +namespace FlexFlow { + +template +std::multiset multiset_of(C const &c) { + return std::multiset{std::cbegin(c), std::cend(c)}; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/permute_with_key.h b/lib/utils/include/utils/containers/permute_with_key.h new file mode 100644 index 0000000000..df0f113003 --- /dev/null +++ b/lib/utils/include/utils/containers/permute_with_key.h @@ -0,0 +1,29 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_PERMUTE_WITH_KEY_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_PERMUTE_WITH_KEY_H + +#include "utils/containers/transform.h" +#include "utils/containers/range.h" +#include "utils/containers/product.h" + +namespace FlexFlow { + +template +std::vector permute_with_key(int key, std::vector const &v) { + int max_permutations = 10000; + int reduced_key = key % max_permutations; + + std::vector permutation = range(v.size()); + + for (int i = 0; i < reduced_key; i++) { + std::next_permutation(permutation.begin(), permutation.end()); + } + + return transform(permutation, + [&](int permutation_entry) { + return v.at(permutation_entry); + }); +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/require_all_same.h b/lib/utils/include/utils/containers/require_all_same.h new file mode 100644 index 0000000000..95eb8410a7 --- /dev/null +++ b/lib/utils/include/utils/containers/require_all_same.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REQUIRE_ALL_SAME_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REQUIRE_ALL_SAME_H + +#include +#include +#include "utils/containers/require_all_same1.h" + +namespace FlexFlow { + +template +std::optional require_all_same(C const &c) { + if (c.empty()) { + return std::nullopt; + } else { + return require_all_same1(c); + } +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/require_all_same1.h b/lib/utils/include/utils/containers/require_all_same1.h index 2f42243857..3e210e1c11 100644 --- a/lib/utils/include/utils/containers/require_all_same1.h +++ b/lib/utils/include/utils/containers/require_all_same1.h @@ -2,26 +2,17 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REQUIRE_ALL_SAME1_H #include -#include +#include namespace FlexFlow { template -tl::expected require_all_same1(C const &c) { - if (c.empty()) { - return tl::unexpected(fmt::format( - "require_all_same1 expected non-empty container, but received {}", c)); - } +T require_all_same1(C const &c) { + ASSERT(!c.empty()); T const &first = *c.cbegin(); for (T const &v : c) { - if (v != first) { - return tl::unexpected(fmt::format("require_all_same1 found non-same " - "elements {} and {} in containers {}", - first, - v, - c)); - } + ASSERT(v == first); } return first; } diff --git a/lib/utils/include/utils/containers/require_same.h b/lib/utils/include/utils/containers/require_same.h index f638e1da1a..2f3439db32 100644 --- a/lib/utils/include/utils/containers/require_same.h +++ b/lib/utils/include/utils/containers/require_same.h @@ -8,14 +8,21 @@ namespace FlexFlow { template T const &require_same(T const &l, T const &r) { - if (l != r) { - throw mk_runtime_error( - fmt::format("require_same received non-equal inputs: {} != {}", l, r)); - } + ASSERT(l == r, "require_same received non-equal inputs"); return l; } +template +T const &require_same(T const &t1, T const &t2, T const &t3) { + return require_same(require_same(t1, t2), t3); +} + +template +T const &require_same(T const &t1, T const &t2, T const &t3, T const &t4) { + return require_same(require_same(require_same(t1, t2), t3), t4); +} + } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/scanl.h b/lib/utils/include/utils/containers/scanl.h index a30a9e1576..0d5a4fd7c4 100644 --- a/lib/utils/include/utils/containers/scanl.h +++ b/lib/utils/include/utils/containers/scanl.h @@ -1,14 +1,13 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL_H -#include #include namespace FlexFlow { /** * @brief - * Applies `op` to the elements of `c` from left to right, accumulating + * Applies `f` to the elements of `c` from left to right, accumulating * the intermediate results in a vector. `init` is used as the starting point * for the accumulation. * @@ -23,55 +22,18 @@ namespace FlexFlow { * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scanl */ template -std::vector scanl(C const &c, T init, F const &op) { +std::vector scanl(C const &c, T init, F &&f) { std::vector result; result.push_back(init); for (auto const &elem : c) { - init = op(init, elem); + init = f(init, elem); result.push_back(init); } return result; } -/** - * @brief - * Applies `op` to the elements of `c` from left to right, accumulating - * the intermediate results in a vector. The first item of `c` is used as the - * starting point for the accumulation. - * - * @example - * std::vector nums = {1, 2, 3, 4}; - * auto result = scanl1(nums, [](int a, int b) {return a+b;}); - * result -> {1,3,6,10} - * - * @note - * Essentially a foldl1 which stores the intermediate results. - * For more information, see - * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scanl1 - */ -template -std::vector scanl1(C const &c, F op) { - - if (c.empty()) { - return std::vector(); - } - - std::optional init = std::nullopt; - std::vector result; - - for (T const &elem : c) { - if (!init.has_value()) { - init = elem; - } else { - init = op(init.value(), elem); - } - result.push_back(init.value()); - } - return result; -} - } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/scanl1.h b/lib/utils/include/utils/containers/scanl1.h new file mode 100644 index 0000000000..a8411cfcba --- /dev/null +++ b/lib/utils/include/utils/containers/scanl1.h @@ -0,0 +1,48 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL1_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL1_H + +#include +#include + +namespace FlexFlow { + +/** + * @brief + * Applies `f` to the elements of `c` from left to right, accumulating + * the intermediate results in a vector. The first item of `c` is used as the + * starting point for the accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * auto result = scanl1(nums, [](int a, int b) {return a+b;}); + * result -> {1,3,6,10} + * + * @note + * Essentially a foldl1 which stores the intermediate results. + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scanl1 + */ +template +std::vector scanl1(C const &c, F &&f) { + + if (c.empty()) { + return std::vector(); + } + + std::optional init = std::nullopt; + std::vector result; + + for (T const &elem : c) { + if (!init.has_value()) { + init = elem; + } else { + init = f(init.value(), elem); + } + result.push_back(init.value()); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/scanr.h b/lib/utils/include/utils/containers/scanr.h new file mode 100644 index 0000000000..03fc94d8c6 --- /dev/null +++ b/lib/utils/include/utils/containers/scanr.h @@ -0,0 +1,40 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANR_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANR_H + +#include "utils/containers/reversed.h" +#include + +namespace FlexFlow { + +/** + * @brief + * Applies `f` to the elements of `c` from right to left, accumulating + * the intermediate results in a vector. `init` is used as the starting point + * for the accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * auto result = scanl(nums, 0, [](int a, int b) {return a+b;}); + * result -> {10, 9, 7, 4, 0} + * + * @note + * Essentially a foldl which stores the intermediate results + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scan4 + */ +template +std::vector scanr(C const &c, T init, F &&f) { + std::vector result; + + result.push_back(init); + for (auto it = c.crbegin(); it != c.crend(); it++) { + init = f(*it, init); + result.push_back(init); + } + + return reversed(result); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/scanr1.h b/lib/utils/include/utils/containers/scanr1.h new file mode 100644 index 0000000000..7197d2c4ec --- /dev/null +++ b/lib/utils/include/utils/containers/scanr1.h @@ -0,0 +1,49 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANR1_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANR1_H + +#include "utils/containers/reversed.h" +#include +#include + +namespace FlexFlow { + +/** + * @brief + * Applies `op` to the elements of `c` from right to left, accumulating + * the intermediate results in a vector. The first item of `c` is used as the + * starting point for the accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * auto result = scanl1(nums, [](int a, int b) {return a+b;}); + * result -> {10, 9, 7, 4} + * + * @note + * Essentially a foldr1 which stores the intermediate results. + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scanl1 + */ +template +std::vector scanr1(C const &c, F &&f) { + + if (c.empty()) { + return std::vector(); + } + + std::optional init = std::nullopt; + std::vector result; + + for (auto it = c.crbegin(); it != c.crend(); it++) { + if (!init.has_value()) { + init = *it; + } else { + init = f(*it, init.value()); + } + result.push_back(init.value()); + } + return reversed(result); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/set_union.h b/lib/utils/include/utils/containers/set_union.h index 0f7b895f7a..cd29b1e02e 100644 --- a/lib/utils/include/utils/containers/set_union.h +++ b/lib/utils/include/utils/containers/set_union.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SET_UNION_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SET_UNION_H +#include #include namespace FlexFlow { @@ -13,6 +14,13 @@ std::unordered_set set_union(std::unordered_set const &l, return result; } +template +std::set set_union(std::set const &l, std::set const &r) { + std::set result = l; + result.insert(r.cbegin(), r.cend()); + return result; +} + template std::unordered_set set_union(C const &sets) { std::unordered_set result; diff --git a/lib/utils/include/utils/containers/slice.h b/lib/utils/include/utils/containers/slice.h index a82fb383b5..29a4858a0d 100644 --- a/lib/utils/include/utils/containers/slice.h +++ b/lib/utils/include/utils/containers/slice.h @@ -10,7 +10,7 @@ namespace FlexFlow { template std::vector slice(std::vector const &v, - int const &maybe_start, + int maybe_start, std::optional const &maybe_end) { auto begin_iter = v.cbegin(); auto end_iter = v.cend(); diff --git a/lib/utils/include/utils/containers/try_at_idx.h b/lib/utils/include/utils/containers/try_at_idx.h new file mode 100644 index 0000000000..7c16efe218 --- /dev/null +++ b/lib/utils/include/utils/containers/try_at_idx.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_TRY_AT_IDX_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_TRY_AT_IDX_H + +#include "utils/nonnegative_int/nonnegative_int.h" +#include +#include + +namespace FlexFlow { + +template +std::optional try_at_idx(std::vector const &v, nonnegative_int idx) { + if (idx >= v.size()) { + return std::nullopt; + } else { + return v.at(idx.unwrap_nonnegative()); + } +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/uncurry.h b/lib/utils/include/utils/containers/uncurry.h new file mode 100644 index 0000000000..bd76931ca8 --- /dev/null +++ b/lib/utils/include/utils/containers/uncurry.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_UNCURRY_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_UNCURRY_H + +#include +#include + +namespace FlexFlow { + +template > +std::function const &)> uncurry(F &&f) { + return [f](std::pair const &p) -> Result { + return f(p.first, p.second); + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/unordered_set_of.h b/lib/utils/include/utils/containers/unordered_set_of.h index 722ae66d43..6ebd943904 100644 --- a/lib/utils/include/utils/containers/unordered_set_of.h +++ b/lib/utils/include/utils/containers/unordered_set_of.h @@ -2,6 +2,8 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_UNIQUE_H #include +#include +#include "utils/hash/pair.h" namespace FlexFlow { diff --git a/lib/utils/include/utils/containers/vector_from_idx_map.h b/lib/utils/include/utils/containers/vector_from_idx_map.h new file mode 100644 index 0000000000..cd32385d1f --- /dev/null +++ b/lib/utils/include/utils/containers/vector_from_idx_map.h @@ -0,0 +1,29 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_VECTOR_FROM_IDX_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_VECTOR_FROM_IDX_MAP_H + +#include "utils/containers/contains_key.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include +#include +#include + +namespace FlexFlow { + +template +std::optional> + vector_from_idx_map(std::unordered_map const &m) { + std::vector result; + + for (nonnegative_int i = 0_n; i < m.size(); i++) { + if (!contains_key(m, i)) { + return std::nullopt; + } + result.push_back(m.at(i)); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/zip3_with.h b/lib/utils/include/utils/containers/zip3_with.h new file mode 100644 index 0000000000..fd79c02591 --- /dev/null +++ b/lib/utils/include/utils/containers/zip3_with.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP3_WITH_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP3_WITH_H + +#include + +namespace FlexFlow { + +template > +std::vector zip3_with(std::vector const &v_a, + std::vector const &v_b, + std::vector const &v_c, + F &&f) { + std::vector result; + for (int i = 0; i < std::min(v_a.size(), std::min(v_b.size(), v_c.size())); + i++) { + result.push_back(f(v_a.at(i), v_b.at(i), v_c.at(i))); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/zip3_with_strict.h b/lib/utils/include/utils/containers/zip3_with_strict.h new file mode 100644 index 0000000000..055ae5a7fe --- /dev/null +++ b/lib/utils/include/utils/containers/zip3_with_strict.h @@ -0,0 +1,31 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP3_WITH_STRICT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP3_WITH_STRICT_H + +#include "utils/containers/zip3_with.h" +#include +#include + +namespace FlexFlow { + +template > +std::vector zip3_with_strict(std::vector const &v_a, + std::vector const &v_b, + std::vector const &v_c, + F &&f) { + ASSERT(v_a.size() == v_b.size() && v_b.size() == v_c.size(), + "zip3_with_strict requires inputs to have the same length, but " + "received mismatched lengths", + v_a.size(), + v_b.size(), + v_c.size()); + + return zip3_with(v_a, v_b, v_c, f); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/zip_with.h b/lib/utils/include/utils/containers/zip_with.h index 7ae91a7336..2fb54d85a7 100644 --- a/lib/utils/include/utils/containers/zip_with.h +++ b/lib/utils/include/utils/containers/zip_with.h @@ -13,7 +13,8 @@ std::vector zip_with(std::vector const &l, std::vector const &r, F &&f) { std::vector result; for (int i = 0; i < l.size() && i < r.size(); i++) { - result.push_back(f(l.at(i), r.at(i))); + Result elem = f(l.at(i), r.at(i)); + result.push_back(elem); } return result; diff --git a/lib/utils/include/utils/containers/zip_with_strict.h b/lib/utils/include/utils/containers/zip_with_strict.h index fd1e2fa7fd..b9b2e47c84 100644 --- a/lib/utils/include/utils/containers/zip_with_strict.h +++ b/lib/utils/include/utils/containers/zip_with_strict.h @@ -2,8 +2,8 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP_WITH_STRICT_H #include "utils/containers/zip_with.h" -#include "utils/exception.h" #include "utils/fmt/vector.h" +#include #include namespace FlexFlow { @@ -15,15 +15,9 @@ template zip_with_strict(std::vector const &lhs, std::vector const &rhs, F &&f) { - if (lhs.size() != rhs.size()) { - throw mk_runtime_error(fmt::format( - "zip_with_strict requires inputs to have the same length, but received " - "lhs = {} (length {}) and rhs = {} (length {})", - lhs, - lhs.size(), - rhs, - rhs.size())); - } + ASSERT(lhs.size() == rhs.size(), + "zip_with_strict requires inputs to have the same length." + "For a similar function without this requirement, see zip_with."); return zip_with(lhs, rhs, f); } diff --git a/lib/utils/include/utils/exception.h b/lib/utils/include/utils/exception.h index 959edcff8a..2096220468 100644 --- a/lib/utils/include/utils/exception.h +++ b/lib/utils/include/utils/exception.h @@ -16,7 +16,7 @@ namespace FlexFlow { ":" __LINE__); #else #define NOT_IMPLEMENTED() \ - throw not_implemented(__PRETTY_FUNCTION__, __FILE__, __LINE__); + throw ::FlexFlow::not_implemented(__PRETTY_FUNCTION__, __FILE__, __LINE__); #endif class not_implemented : public std::logic_error { diff --git a/lib/utils/include/utils/full_binary_tree/get_path_to_leaf_map.h b/lib/utils/include/utils/full_binary_tree/get_path_to_leaf_map.h new file mode 100644 index 0000000000..710f7e7955 --- /dev/null +++ b/lib/utils/include/utils/full_binary_tree/get_path_to_leaf_map.h @@ -0,0 +1,50 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FULL_BINARY_TREE_GET_PATH_TO_LEAF_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FULL_BINARY_TREE_GET_PATH_TO_LEAF_MAP_H + +#include "utils/containers/binary_merge_disjoint_maps.h" +#include "utils/containers/multiset_union.h" +#include "utils/containers/map_keys.h" +#include "utils/full_binary_tree/binary_tree_path.dtg.h" +#include "utils/full_binary_tree/binary_tree_path.h" +#include "utils/full_binary_tree/full_binary_tree_visitor.dtg.h" +#include "utils/full_binary_tree/visit.h" +#include + +namespace FlexFlow { + +template +std::unordered_map + get_path_to_leaf_map(Tree const &tree, + FullBinaryTreeImplementation const &impl) { + + auto visitor = + FullBinaryTreeVisitor, Tree, Parent, Leaf>{ + [&](Parent const &parent) -> std::unordered_map { + + std::unordered_map left_map = + map_keys(get_path_to_leaf_map(impl.get_left_child(parent), impl), + [](BinaryTreePath const &p) { + return nest_inside_left_child(p); + }); + + std::unordered_map right_map = + map_keys(get_path_to_leaf_map(impl.get_right_child(parent), impl), + [](BinaryTreePath const &p) { + return nest_inside_right_child(p); + }); + + return binary_merge_disjoint_maps(left_map, right_map); + }, + [](Leaf const &leaf) -> std::unordered_map { + return std::unordered_map{ + {binary_tree_root_path(), leaf}, + }; + }, + }; + + return visit(tree, impl, visitor); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/is_isomorphic_under.h b/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/is_isomorphic_under.h index ecf9c22143..df8207251f 100644 --- a/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/is_isomorphic_under.h +++ b/lib/utils/include/utils/graph/labelled_open_dataflow_graph/algorithms/is_isomorphic_under.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_LABELLED_OPEN_DATAFLOW_GRAPH_ALGORITHMS_IS_ISOMORPHIC_UNDER_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_LABELLED_OPEN_DATAFLOW_GRAPH_ALGORITHMS_IS_ISOMORPHIC_UNDER_H +#include "utils/bidict/algorithms/transform_values.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/get_graph_data.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/permute_input_ids.h" #include "utils/graph/labelled_open_dataflow_graph/algorithms/permute_node_ids.h" @@ -17,14 +18,14 @@ bool is_isomorphic_under( OpenDataflowGraphIsomorphism const &candidate_isomorphism) { bidict node_permutation = - map_values(candidate_isomorphism.node_mapping, [](Node const &dst_node) { - return NewNode{dst_node}; - }).reversed(); + transform_values(candidate_isomorphism.node_mapping, + [](Node const &dst_node) { return NewNode{dst_node}; }) + .reversed(); bidict input_permutation = - map_values(candidate_isomorphism.input_mapping, - [](DataflowGraphInput const &dst_input) { - return NewDataflowGraphInput{dst_input}; - }) + transform_values(candidate_isomorphism.input_mapping, + [](DataflowGraphInput const &dst_input) { + return NewDataflowGraphInput{dst_input}; + }) .reversed(); return get_graph_data(permute_input_ids( permute_node_ids(src, node_permutation), input_permutation)) == diff --git a/lib/utils/include/utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_path_to_leaf_map.h b/lib/utils/include/utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_path_to_leaf_map.h new file mode 100644 index 0000000000..a07b2c3926 --- /dev/null +++ b/lib/utils/include/utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/get_path_to_leaf_map.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_SERIES_PARALLEL_BINARY_SP_DECOMPOSITION_TREE_GENERIC_BINARY_SP_DECOMPOSITION_TREE_GET_PATH_TO_LEAF_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_GRAPH_SERIES_PARALLEL_BINARY_SP_DECOMPOSITION_TREE_GENERIC_BINARY_SP_DECOMPOSITION_TREE_GET_PATH_TO_LEAF_MAP_H + +#include "utils/full_binary_tree/get_path_to_leaf_map.h" +#include "utils/graph/series_parallel/binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree/generic_binary_sp_decomposition_tree_implementation.h" + +namespace FlexFlow { + +template +std::unordered_map get_path_to_leaf_map( + Tree const &tree, + GenericBinarySPDecompositionTreeImplementation const &impl) { + FullBinaryTreeImplementation, Leaf> + full_binary_impl = get_full_binary_impl_from_generic_sp_impl(impl); + + return get_path_to_leaf_map(tree, full_binary_impl); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/int_ge_two/algorithms/try_int_ge_two_from_positive_int.h b/lib/utils/include/utils/int_ge_two/algorithms/try_int_ge_two_from_positive_int.h new file mode 100644 index 0000000000..77e58309b6 --- /dev/null +++ b/lib/utils/include/utils/int_ge_two/algorithms/try_int_ge_two_from_positive_int.h @@ -0,0 +1,12 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_INT_GE_TWO_ALGORITHMS_TRY_INT_GE_TWO_FROM_POSITIVE_INT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_INT_GE_TWO_ALGORITHMS_TRY_INT_GE_TWO_FROM_POSITIVE_INT_H + +#include "utils/int_ge_two/int_ge_two.h" + +namespace FlexFlow { + +std::optional try_int_ge_two_from_positive_int(positive_int); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/int_ge_two/int_ge_two.h b/lib/utils/include/utils/int_ge_two/int_ge_two.h new file mode 100644 index 0000000000..c22254b219 --- /dev/null +++ b/lib/utils/include/utils/int_ge_two/int_ge_two.h @@ -0,0 +1,132 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_INT_GE_TWO_INT_GE_TWO_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_INT_GE_TWO_INT_GE_TWO_H + +#include "utils/positive_int/positive_int.h" +#include + +namespace FlexFlow { + +struct int_ge_two { + int_ge_two() = delete; + explicit int_ge_two(int value); + explicit int_ge_two(size_t value); + explicit int_ge_two(nonnegative_int value); + explicit int_ge_two(positive_int value); + + explicit operator int() const noexcept; + explicit operator nonnegative_int() const noexcept; + explicit operator positive_int() const noexcept; + + bool operator<(int_ge_two other) const; + bool operator==(int_ge_two other) const; + bool operator>(int_ge_two other) const; + bool operator<=(int_ge_two other) const; + bool operator!=(int_ge_two other) const; + bool operator>=(int_ge_two other) const; + + bool operator<(positive_int other) const; + bool operator==(positive_int other) const; + bool operator>(positive_int other) const; + bool operator<=(positive_int other) const; + bool operator!=(positive_int other) const; + bool operator>=(positive_int other) const; + + friend bool operator<(positive_int lhs, int_ge_two rhs); + friend bool operator==(positive_int lhs, int_ge_two rhs); + friend bool operator>(positive_int lhs, int_ge_two rhs); + friend bool operator<=(positive_int lhs, int_ge_two rhs); + friend bool operator!=(positive_int lhs, int_ge_two rhs); + friend bool operator>=(positive_int lhs, int_ge_two rhs); + + bool operator<(nonnegative_int other) const; + bool operator==(nonnegative_int other) const; + bool operator>(nonnegative_int other) const; + bool operator<=(nonnegative_int other) const; + bool operator!=(nonnegative_int other) const; + bool operator>=(nonnegative_int other) const; + + friend bool operator<(nonnegative_int lhs, int_ge_two rhs); + friend bool operator==(nonnegative_int lhs, int_ge_two rhs); + friend bool operator>(nonnegative_int lhs, int_ge_two rhs); + friend bool operator<=(nonnegative_int lhs, int_ge_two rhs); + friend bool operator!=(nonnegative_int lhs, int_ge_two rhs); + friend bool operator>=(nonnegative_int lhs, int_ge_two rhs); + + bool operator<(int other) const; + bool operator==(int other) const; + bool operator>(int other) const; + bool operator<=(int other) const; + bool operator!=(int other) const; + bool operator>=(int other) const; + + friend bool operator<(int lhs, int_ge_two rhs); + friend bool operator==(int lhs, int_ge_two rhs); + friend bool operator>(int lhs, int_ge_two rhs); + friend bool operator<=(int lhs, int_ge_two rhs); + friend bool operator!=(int lhs, int_ge_two rhs); + friend bool operator>=(int lhs, int_ge_two rhs); + + int_ge_two operator+(int_ge_two other) const; + int_ge_two operator+(positive_int other) const; + int_ge_two operator+(nonnegative_int other) const; + int_ge_two &operator++(); + int_ge_two operator++(int); + int_ge_two &operator+=(int_ge_two other); + int_ge_two &operator+=(positive_int other); + int_ge_two &operator+=(nonnegative_int other); + + friend int_ge_two operator+(nonnegative_int lhs, int_ge_two rhs); + friend int_ge_two operator+(positive_int lhs, int_ge_two rhs); + + int_ge_two operator*(int_ge_two other) const; + int_ge_two &operator*=(int_ge_two other); + int_ge_two operator*(positive_int other) const; + int_ge_two &operator*=(positive_int other); + nonnegative_int operator*(nonnegative_int other) const; + + friend int_ge_two operator*(positive_int lhs, int_ge_two rhs); + friend nonnegative_int operator*(nonnegative_int lhs, int_ge_two rhs); + + int int_from_int_ge_two() const; + nonnegative_int nonnegative_int_from_int_ge_two() const; + positive_int positive_int_from_int_ge_two() const; + + friend std::ostream &operator<<(std::ostream &os, int_ge_two n); + + friend int format_as(int_ge_two); + +private: + void check_invariant() const; + +private: + int value_; +}; + +int_ge_two operator""_ge2(unsigned long long int); + +std::optional try_int_ge_two_from_positive_int(positive_int); + +} // namespace FlexFlow + +namespace nlohmann { +template <> +struct adl_serializer<::FlexFlow::int_ge_two> { + static ::FlexFlow::int_ge_two from_json(json const &j); + static void to_json(json &j, ::FlexFlow::int_ge_two t); +}; +} // namespace nlohmann + +namespace rc { +template <> +struct Arbitrary<::FlexFlow::int_ge_two> { + static Gen<::FlexFlow::int_ge_two> arbitrary(); +}; +} // namespace rc + +namespace std { +template <> +struct hash<::FlexFlow::int_ge_two> { + std::size_t operator()(FlexFlow::int_ge_two n) const noexcept; +}; +} // namespace std +#endif diff --git a/lib/utils/include/utils/json/check_is_json_deserializable.h b/lib/utils/include/utils/json/check_is_json_deserializable.h index dd5f397c19..e54db356df 100644 --- a/lib/utils/include/utils/json/check_is_json_deserializable.h +++ b/lib/utils/include/utils/json/check_is_json_deserializable.h @@ -5,9 +5,9 @@ namespace FlexFlow { -#define CHECK_IS_JSON_DESERIALIZABLE(TYPENAME) \ - static_assert(::FlexFlow::is_json_deserializable::value, \ - #TYPENAME " should be json deserializeable") +#define CHECK_IS_JSON_DESERIALIZABLE(...) \ + static_assert(::FlexFlow::is_json_deserializable<__VA_ARGS__>::value, \ + #__VA_ARGS__ " should be json deserializeable") } // namespace FlexFlow diff --git a/lib/utils/include/utils/json/check_is_json_serializable.h b/lib/utils/include/utils/json/check_is_json_serializable.h index dfcb26081d..d4f1f28003 100644 --- a/lib/utils/include/utils/json/check_is_json_serializable.h +++ b/lib/utils/include/utils/json/check_is_json_serializable.h @@ -5,9 +5,9 @@ namespace FlexFlow { -#define CHECK_IS_JSON_SERIALIZABLE(TYPENAME) \ - static_assert(::FlexFlow::is_json_serializable::value, \ - #TYPENAME " should be json serializeable") +#define CHECK_IS_JSON_SERIALIZABLE(...) \ + static_assert(::FlexFlow::is_json_serializable<__VA_ARGS__>::value, \ + #__VA_ARGS__ " should be json serializeable") } // namespace FlexFlow diff --git a/lib/utils/include/utils/json/check_is_jsonable.h b/lib/utils/include/utils/json/check_is_jsonable.h index 41a64a1b83..e191f3e826 100644 --- a/lib/utils/include/utils/json/check_is_jsonable.h +++ b/lib/utils/include/utils/json/check_is_jsonable.h @@ -6,11 +6,11 @@ namespace FlexFlow { -#define CHECK_IS_JSONABLE(TYPENAME) \ - static_assert(is_json_serializable::value, \ - #TYPENAME " should be json serializeable"); \ - static_assert(is_json_deserializable::value, \ - #TYPENAME " should be json deserializeable") +#define CHECK_IS_JSONABLE(...) \ + static_assert(is_json_serializable<__VA_ARGS__>::value, \ + #__VA_ARGS__ " should be json serializeable"); \ + static_assert(is_json_deserializable<__VA_ARGS__>::value, \ + #__VA_ARGS__ " should be json deserializeable") } // namespace FlexFlow diff --git a/lib/utils/include/utils/json/visitable.h b/lib/utils/include/utils/json/visitable.h deleted file mode 100644 index abc20065de..0000000000 --- a/lib/utils/include/utils/json/visitable.h +++ /dev/null @@ -1,152 +0,0 @@ -#ifndef _FLEXFLOW_UTILS_INCLUDE_UTILS_JSON_H -#define _FLEXFLOW_UTILS_INCLUDE_UTILS_JSON_H - -#include "utils/json/is_json_deserializable.h" -#include "utils/json/is_json_serializable.h" -#include "utils/json/is_jsonable.h" -#include "utils/json_core.h" -#include "utils/optional.h" -#include "utils/sequence.h" -#include "utils/type_traits.h" -#include "utils/variant.h" -#include "utils/visitable.h" - -namespace FlexFlow { - -struct json_serialization_visitor { - json_serialization_visitor() = delete; - json_serialization_visitor(json &j) : j(j) {} - - json &j; - - template - void operator()(char const *field_name, T const &field_value) { - j[field_name] = field_value; - } -}; - -struct json_deserialization_visitor { - json_deserialization_visitor() = delete; - json_deserialization_visitor(json const &j) : j(j) {} - - json const &j; - - template - void operator()(char const *field_name, T &field_value) { - j.at(field_name).get_to(field_value); - } -}; - -static_assert(std::is_same>, - std::tuple>::value, - ""); -static_assert(std::is_same>, - std::tuple<>>::value, - ""); - -template -typename std::enable_if<(idx >= std::tuple_size>::value), - std::tuple<>>::type - tuple_from_json_impl(json const &j) { - return std::tuple<>{}; -} - -template -struct TupleFromJson { - tuple_tail_t> operator()(json const &j) { - using FieldT = visit_struct::type_at; - - FieldT field = - j.at(visit_struct::get_name()).template get(); - - return std::tuple_cat(std::tuple(field), - TupleFromJson<(idx + 1), T>{}(j)); - } -}; - -template -struct TupleFromJson< - idx, - T, - typename std::enable_if<( - idx > std::tuple_size>::value)>::type> { - std::tuple<> operator()(json const &j) { - return {}; - } -}; - -template -visit_as_tuple_t tuple_from_json(json const &j) { - return TupleFromJson<0, T>{}(j); -} - -template -void visit_json_serialize(json &j, T const &t) { - static_assert(is_visitable::value, "Type must be visitable"); - static_assert(elements_satisfy::value, - "Elements must be deserializable"); - - json_serialization_visitor vis(j); - visit_struct::for_each(t, vis); -} - -template -void visit_json_deserialize(json const &j, T &t) { - static_assert(is_visitable::value, "Type must be visitable"); - static_assert(elements_satisfy::value, - "Elements must be deserializable"); - - json_deserialization_visitor vis(j); - visit_struct::for_each(t, vis); -} - -template -T moveonly_visit_json_deserialize(json const &j) { - static_assert(is_visitable::value, "Type must be visitable"); - static_assert(!std::is_default_constructible::value, ""); - static_assert(elements_satisfy::value, - "Elements must be deserializable"); - - return visitable_from_tuple(tuple_from_json(j)); -} - -} // namespace FlexFlow - -namespace nlohmann { - -template -struct adl_serializer< - T, - typename std::enable_if<::FlexFlow::conjunction< - ::FlexFlow::is_visitable, - ::FlexFlow::elements_satisfy<::FlexFlow::is_json_serializable, T>, - std::is_default_constructible>::value>::type> { - static void to_json(json &j, T const &t) { - ::FlexFlow::visit_json_serialize(j, t); - } - - static void from_json(json const &j, T &t) { - ::FlexFlow::visit_json_deserialize(j, t); - } -}; - -template -struct adl_serializer< - T, - typename std::enable_if<::FlexFlow::conjunction< - ::FlexFlow::is_visitable, - ::FlexFlow::elements_satisfy<::FlexFlow::is_json_serializable, T>, - ::FlexFlow::negation>, - std::is_move_constructible>::value>::type> { - static void to_json(json &j, T const &t) { - ::FlexFlow::visit_json_serialize(j, t); - } - - static T from_json(json const &j) { - return ::FlexFlow::moveonly_visit_json_deserialize(j); - } -}; - -} // namespace nlohmann - -#endif diff --git a/lib/utils/include/utils/many_to_one/exhaustive_relational_join.h b/lib/utils/include/utils/many_to_one/exhaustive_relational_join.h new file mode 100644 index 0000000000..c908a9dcec --- /dev/null +++ b/lib/utils/include/utils/many_to_one/exhaustive_relational_join.h @@ -0,0 +1,35 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_EXHAUSTIVE_RELATIONAL_JOIN_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_EXHAUSTIVE_RELATIONAL_JOIN_H + +#include "utils/many_to_one/many_to_one.h" + +namespace FlexFlow { + +template +ManyToOne exhaustive_relational_join(ManyToOne const &fst, + ManyToOne const &snd) { + ManyToOne result; + + if (fst.right_values() != snd.left_values()) { + throw mk_runtime_error( + fmt::format("exhaustive_relational_join for ManyToOne received inputs " + "with non-matching inner dimensions: right dimension of " + "fst is {} while left dimension of snd is {}", + fst.right_values(), + snd.left_values())); + } + + for (T3 const &t3 : snd.right_values()) { + for (T2 const &t2 : snd.at_r(t3)) { + for (T1 const &t1 : fst.at_r(t2)) { + result.insert({t1, t3}); + } + } + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/many_to_one/invert_many_to_one.h b/lib/utils/include/utils/many_to_one/invert_many_to_one.h new file mode 100644 index 0000000000..7fdf36859f --- /dev/null +++ b/lib/utils/include/utils/many_to_one/invert_many_to_one.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_INVERT_MANY_TO_ONE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_INVERT_MANY_TO_ONE_H + +#include "utils/many_to_one/many_to_one.h" +#include "utils/one_to_many/one_to_many.h" + +namespace FlexFlow { + +template +OneToMany invert_many_to_one(ManyToOne const &many_to_one) { + OneToMany result; + + for (L const &l : many_to_one.left_values()) { + result.insert({many_to_one.at_l(l), l}); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/many_to_one/many_to_one.h b/lib/utils/include/utils/many_to_one/many_to_one.h new file mode 100644 index 0000000000..e0ef964749 --- /dev/null +++ b/lib/utils/include/utils/many_to_one/many_to_one.h @@ -0,0 +1,175 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_MANY_TO_ONE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_MANY_TO_ONE_H + +#include "utils/containers/keys.h" +#include "utils/containers/try_at.h" +#include "utils/exception.h" +#include "utils/fmt/unordered_map.h" +#include "utils/fmt/unordered_set.h" +#include "utils/hash-utils.h" +#include "utils/hash/tuple.h" +#include "utils/hash/unordered_map.h" +#include "utils/hash/unordered_set.h" +#include +#include +#include +#include +#include "utils/json/check_is_json_serializable.h" +#include "utils/json/check_is_json_deserializable.h" +#include +#include "utils/containers/values.h" +#include "utils/containers/unordered_set_of.h" + +namespace FlexFlow { + +template +struct ManyToOne { +public: + ManyToOne() : m_l_to_r(), m_r_to_l() {} + + template + ManyToOne(It start, It end) : ManyToOne() { + for (; start < end; start++) { + ASSERT(start->first.size() > 0); + for (L const &l : start->first) { + this->insert(std::pair{l, start->second}); + } + } + } + + ManyToOne(std::initializer_list, R>> const + &l_to_r) + : ManyToOne(l_to_r.begin(), l_to_r.end()) {} + + bool operator==(ManyToOne const &other) const { + return this->tie() == other.tie(); + } + + bool operator!=(ManyToOne const &other) const { + return this->tie() != other.tie(); + } + + void insert(std::pair const &p) { + L l = p.first; + R r = p.second; + + std::optional found_r = try_at(this->m_l_to_r, l); + + if (!found_r.has_value()) { + this->m_l_to_r.insert({l, r}); + this->m_r_to_l[r].insert(l); + } else if (found_r.value() == r) { + return; + } else { + throw mk_runtime_error(fmt::format( + "Existing mapping found for left value {}: tried to map to right " + "value {}, but is already bound to right value {}", + l, + r, + found_r.value())); + } + } + + R const &at_l(L const &l) const { + return this->m_l_to_r.at(l); + } + + std::unordered_set const &at_r(R const &r) const { + return this->m_r_to_l.at(r); + } + + std::unordered_set left_values() const { + return keys(this->m_l_to_r); + } + + std::unordered_set> left_groups() const { + return unordered_set_of(values(this->m_r_to_l)); + } + + std::unordered_set right_values() const { + return keys(this->m_r_to_l); + } + + std::unordered_map const &l_to_r() const { + return this->m_l_to_r; + } + + std::unordered_map> const &r_to_l() const { + return this->m_r_to_l; + } + +private: + std::unordered_map m_l_to_r; + std::unordered_map> m_r_to_l; + +private: + std::tuple tie() const { + return std::tie(this->m_l_to_r, this->m_r_to_l); + } + + friend struct std::hash>; +}; + +template +std::unordered_map, R> + format_as(ManyToOne const &m) { + std::unordered_map, R> result; + + for (R const &r : m.right_values()) { + result.insert({m.at_r(r), r}); + } + + return result; +} + +template +std::ostream &operator<<(std::ostream &s, ManyToOne const &m) { + return (s << fmt::to_string(m)); +} + +} // namespace FlexFlow + +namespace nlohmann { + +template +struct adl_serializer<::FlexFlow::ManyToOne> { + static ::FlexFlow::ManyToOne from_json(json const &j) { + CHECK_IS_JSON_DESERIALIZABLE(L); + CHECK_IS_JSON_DESERIALIZABLE(R); + + NOT_IMPLEMENTED(); + } + + static void to_json(json &j, ::FlexFlow::ManyToOne const &m) { + CHECK_IS_JSON_SERIALIZABLE(L); + CHECK_IS_JSON_SERIALIZABLE(R); + + NOT_IMPLEMENTED(); + } +}; + +} + +namespace rc { + +template +struct Arbitrary<::FlexFlow::ManyToOne> { + static Gen<::FlexFlow::ManyToOne> arbitrary() { + NOT_IMPLEMENTED(); + } +}; + +} + +namespace std { + +template +struct hash<::FlexFlow::ManyToOne> { + size_t operator()(::FlexFlow::ManyToOne const &m) { + return ::FlexFlow::get_std_hash(m.tie()); + } +}; + +} // namespace std + +#endif diff --git a/lib/utils/include/utils/many_to_one/many_to_one_from_bidict.h b/lib/utils/include/utils/many_to_one/many_to_one_from_bidict.h new file mode 100644 index 0000000000..ba50a960c2 --- /dev/null +++ b/lib/utils/include/utils/many_to_one/many_to_one_from_bidict.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_MANY_TO_ONE_FROM_BIDICT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_MANY_TO_ONE_MANY_TO_ONE_FROM_BIDICT_H + +#include "utils/bidict/bidict.h" +#include "utils/many_to_one/many_to_one.h" + +namespace FlexFlow { + +template +ManyToOne many_to_one_from_bidict(bidict const &b) { + ManyToOne result; + + for (auto const &[l, r] : b) { + result.insert({l, r}); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/nonnegative_int/range.h b/lib/utils/include/utils/nonnegative_int/range.h new file mode 100644 index 0000000000..7b1fa8d480 --- /dev/null +++ b/lib/utils/include/utils/nonnegative_int/range.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_RANGE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_NONNEGATIVE_INT_RANGE_H + +#include "utils/nonnegative_int/nonnegative_int.h" +#include + +namespace FlexFlow { + +std::vector + range(nonnegative_int start, nonnegative_int end, int step = 1); +std::vector range(nonnegative_int end); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/one_to_many/exhaustive_relational_join.h b/lib/utils/include/utils/one_to_many/exhaustive_relational_join.h new file mode 100644 index 0000000000..a959df2398 --- /dev/null +++ b/lib/utils/include/utils/one_to_many/exhaustive_relational_join.h @@ -0,0 +1,35 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_EXHAUSTIVE_RELATIONAL_JOIN_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_EXHAUSTIVE_RELATIONAL_JOIN_H + +#include "utils/one_to_many/one_to_many.h" + +namespace FlexFlow { + +template +OneToMany exhaustive_relational_join(OneToMany const &fst, + OneToMany const &snd) { + OneToMany result; + + if (fst.right_values() != snd.left_values()) { + throw mk_runtime_error( + fmt::format("exhaustive_relational_join for OneToMany received inputs " + "with non-matching inner dimensions: right dimension of " + "fst is {} while left dimension of snd is {}", + fst.right_values(), + snd.left_values())); + } + + for (T1 const &t1 : fst.left_values()) { + for (T2 const &t2 : fst.at_l(t1)) { + for (T3 const &t3 : snd.at_l(t2)) { + result.insert({t1, t3}); + } + } + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/one_to_many/invert_one_to_many.h b/lib/utils/include/utils/one_to_many/invert_one_to_many.h new file mode 100644 index 0000000000..bde623d387 --- /dev/null +++ b/lib/utils/include/utils/one_to_many/invert_one_to_many.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_INVERT_ONE_TO_MANY_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_INVERT_ONE_TO_MANY_H + +#include "utils/many_to_one/many_to_one.h" +#include "utils/one_to_many/one_to_many.h" + +namespace FlexFlow { + +template +ManyToOne invert_one_to_many(OneToMany const &one_to_many) { + ManyToOne result; + + for (R const &r : one_to_many.right_values()) { + result.insert({r, one_to_many.at_r(r)}); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/one_to_many/one_to_many.h b/lib/utils/include/utils/one_to_many/one_to_many.h new file mode 100644 index 0000000000..062c973e06 --- /dev/null +++ b/lib/utils/include/utils/one_to_many/one_to_many.h @@ -0,0 +1,169 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_ONE_TO_MANY_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_ONE_TO_MANY_H + +#include "utils/containers/generate_map.h" +#include "utils/containers/keys.h" +#include "utils/containers/try_at.h" +#include "utils/exception.h" +#include "utils/fmt/unordered_map.h" +#include "utils/fmt/unordered_set.h" +#include "utils/hash-utils.h" +#include "utils/hash/tuple.h" +#include "utils/hash/unordered_map.h" +#include "utils/hash/unordered_set.h" +#include +#include +#include +#include +#include "utils/json/check_is_json_serializable.h" +#include "utils/json/check_is_json_deserializable.h" +#include +#include "utils/containers/values.h" +#include "utils/containers/unordered_set_of.h" + +namespace FlexFlow { + +template +struct OneToMany { +public: + OneToMany() : m_l_to_r(), m_r_to_l() {} + + template + OneToMany(It start, It end) : OneToMany() { + for (; start < end; start++) { + ASSERT(start->second.size() > 0); + for (R const &r : start->second) { + this->insert(std::pair{start->first, r}); + } + } + } + + OneToMany(std::initializer_list>> const + &l_to_r) + : OneToMany(l_to_r.begin(), l_to_r.end()) {} + + bool operator==(OneToMany const &other) const { + return this->tie() == other.tie(); + } + + bool operator!=(OneToMany const &other) const { + return this->tie() != other.tie(); + } + + void insert(std::pair const &p) { + L l = p.first; + R r = p.second; + + std::optional found_l = try_at(this->m_r_to_l, r); + + if (!found_l.has_value()) { + this->m_r_to_l.insert({r, l}); + this->m_l_to_r[l].insert(r); + } else if (found_l.value() == l) { + return; + } else { + throw mk_runtime_error( + fmt::format("Existing mapping found for right value {}: tried to map " + "to left value {}, but is already bound to left value {}", + r, + l, + found_l.value())); + } + } + + std::unordered_set const &at_l(L const &l) const { + return this->m_l_to_r.at(l); + } + + L const &at_r(R const &r) const { + return this->m_r_to_l.at(r); + } + + std::unordered_set left_values() const { + return keys(this->m_l_to_r); + } + + std::unordered_set right_values() const { + return keys(this->m_r_to_l); + } + + std::unordered_set> right_groups() const { + return unordered_set_of(values(this->m_l_to_r)); + } + + std::unordered_map> const &l_to_r() const { + return this->m_l_to_r; + } + + std::unordered_map const &r_to_l() const { + return this->m_r_to_l; + } +private: + std::unordered_map> m_l_to_r; + std::unordered_map m_r_to_l; + +private: + std::tuple tie() const { + return std::tie(this->m_l_to_r, this->m_r_to_l); + } + + friend struct std::hash>; +}; + +template +std::unordered_map> + format_as(OneToMany const &m) { + return generate_map(m.left_values(), [&](L const &l) { return m.at_l(l); }); +} + +template +std::ostream &operator<<(std::ostream &s, OneToMany const &m) { + return (s << fmt::to_string(m)); +} + +} // namespace FlexFlow + +namespace nlohmann { + +template +struct adl_serializer<::FlexFlow::OneToMany> { + static ::FlexFlow::OneToMany from_json(json const &j) { + CHECK_IS_JSON_DESERIALIZABLE(L); + CHECK_IS_JSON_DESERIALIZABLE(R); + + NOT_IMPLEMENTED(); + } + + static void to_json(json &j, ::FlexFlow::OneToMany const &m) { + CHECK_IS_JSON_SERIALIZABLE(L); + CHECK_IS_JSON_SERIALIZABLE(R); + + NOT_IMPLEMENTED(); + } +}; + +} + +namespace rc { + +template +struct Arbitrary<::FlexFlow::OneToMany> { + static Gen<::FlexFlow::OneToMany> arbitrary() { + NOT_IMPLEMENTED(); + } +}; + +} + +namespace std { + +template +struct hash<::FlexFlow::OneToMany> { + size_t operator()(::FlexFlow::OneToMany const &m) { + return ::FlexFlow::get_std_hash(m.tie()); + } +}; + +} // namespace std + +#endif diff --git a/lib/utils/include/utils/one_to_many/one_to_many_from_bidict.h b/lib/utils/include/utils/one_to_many/one_to_many_from_bidict.h new file mode 100644 index 0000000000..3783f1f663 --- /dev/null +++ b/lib/utils/include/utils/one_to_many/one_to_many_from_bidict.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_ONE_TO_MANY_FROM_BIDICT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_ONE_TO_MANY_FROM_BIDICT_H + +#include "utils/bidict/bidict.h" +#include "utils/one_to_many/one_to_many.h" + +namespace FlexFlow { + +template +OneToMany one_to_many_from_bidict(bidict const &b) { + OneToMany result; + + for (auto const &[l, r] : b) { + result.insert({l, r}); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/one_to_many/one_to_many_from_l_to_r_mapping.h b/lib/utils/include/utils/one_to_many/one_to_many_from_l_to_r_mapping.h new file mode 100644 index 0000000000..bed62caaf6 --- /dev/null +++ b/lib/utils/include/utils/one_to_many/one_to_many_from_l_to_r_mapping.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_FROM_L_TO_R_MAPPING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ONE_TO_MANY_FROM_L_TO_R_MAPPING_H + +#include "utils/one_to_many/one_to_many.h" +#include + +namespace FlexFlow { + +template +OneToMany one_to_many_from_l_to_r_mapping( + std::unordered_map> const &m) { + OneToMany result; + + for (auto const &[l, rs] : m) { + ASSERT(rs.size() > 0); + for (auto const &r : rs) { + result.insert({l, r}); + } + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/optional.h b/lib/utils/include/utils/optional.h index 377561d70c..6c2b1035e2 100644 --- a/lib/utils/include/utils/optional.h +++ b/lib/utils/include/utils/optional.h @@ -16,6 +16,8 @@ T or_else(std::optional const &o, F &&f) { } } + + template T const &unwrap(std::optional const &o, F const &f) { if (o.has_value()) { @@ -28,7 +30,7 @@ T const &unwrap(std::optional const &o, F const &f) { template T const &assert_unwrap(std::optional const &o) { - assert(o.has_value()); + ASSERT(o.has_value()); return o.value(); } diff --git a/lib/utils/include/utils/ord/unordered_map.h b/lib/utils/include/utils/ord/unordered_map.h new file mode 100644 index 0000000000..26cd2db287 --- /dev/null +++ b/lib/utils/include/utils/ord/unordered_map.h @@ -0,0 +1,26 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORD_UNORDERED_MAP_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORD_UNORDERED_MAP_H + +#include "utils/type_traits_core.h" +#include +#include +#include + +namespace FlexFlow { + +template +std::enable_if_t>, bool> + operator<(std::unordered_map const &lhs, std::unordered_map const &rhs) { + CHECK_LT_COMPARABLE(K); + CHECK_LT_COMPARABLE(V); + + std::map lhs_ordered(lhs.cbegin(), lhs.cend()); + std::map rhs_ordered(rhs.cbegin(), rhs.cend()); + + return lhs_ordered < rhs_ordered; +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/ord/unordered_set.h b/lib/utils/include/utils/ord/unordered_set.h new file mode 100644 index 0000000000..aff439ee6c --- /dev/null +++ b/lib/utils/include/utils/ord/unordered_set.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORD_UNORDERED_SET_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORD_UNORDERED_SET_H + +#include "utils/type_traits_core.h" +#include +#include + +namespace FlexFlow { + +template +std::enable_if_t, bool> + operator<(std::unordered_set const &lhs, std::unordered_set const &rhs) { + CHECK_LT_COMPARABLE(T); + + std::set lhs_ordered(lhs.cbegin(), lhs.cend()); + std::set rhs_ordered(rhs.cbegin(), rhs.cend()); + + return lhs_ordered < rhs_ordered; +} + + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/ord/vector.h b/lib/utils/include/utils/ord/vector.h new file mode 100644 index 0000000000..fca09d457a --- /dev/null +++ b/lib/utils/include/utils/ord/vector.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORD_VECTOR_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORD_VECTOR_H + +#include "utils/type_traits_core.h" +#include +#include + +namespace FlexFlow { + +template +std::enable_if_t, bool> + operator<(std::vector const &lhs, std::vector const &rhs) { + CHECK_LT_COMPARABLE(T); + + return std::lexicographical_compare( + lhs.cbegin(), lhs.cend(), rhs.cbegin(), rhs.cend()); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/dim_coord.h b/lib/utils/include/utils/orthotope/dim_coord.h new file mode 100644 index 0000000000..53ca9dc773 --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_coord.h @@ -0,0 +1,175 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_COORD_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_COORD_H + +#include "utils/containers/all_of.h" +#include "utils/containers/keys.h" +#include "utils/containers/map_from_keys_and_values.h" +#include "utils/containers/product.h" +#include "utils/containers/require_same.h" +#include "utils/containers/restrict_keys.h" +#include "utils/containers/scanr.h" +#include "utils/containers/sorted_by.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/containers/zip_with_strict.h" +#include "utils/exception.h" +#include "utils/nonnegative_int/nonnegative_range.h" +#include "utils/orthotope/dim_coord.dtg.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/dim_domain.h" +#include "utils/orthotope/minimal_dim_domain.h" +#include "utils/orthotope/orthotope.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/contains_key.h" +#include "utils/containers/is_subseteq_of.h" +#include "utils/containers/map_values.h" +#include "utils/containers/get_all_assignments.h" + +namespace FlexFlow { + +template +std::unordered_set get_coord_dims(DimCoord const &coord) { + return keys(coord.raw); +} + +template +DimCoord restrict_coord_to_dims(DimCoord const &coord, + std::unordered_set const &dims) { + return DimCoord{ + restrict_keys(coord.raw, dims), + }; +} + +template +OrthotopeCoord + orthotope_coord_from_dim_coord(DimCoord const &coord, + DimOrdering const &dim_ordering) { + return OrthotopeCoord{ + transform(sorted_by(get_coord_dims(coord), dim_ordering.lt), + [&](T const &t) { return coord.raw.at(t); }), + }; +} + +template +DimCoord dim_coord_from_orthotope_coord(OrthotopeCoord const &coord, + std::unordered_set const &dims, + DimOrdering const &dim_ordering) { + return DimCoord{ + map_from_keys_and_values( + sorted_by(dims, dim_ordering.lt), coord.raw), + }; +} + +template +DimCoord lift_dim_coord(DimCoord const &coord, + std::unordered_set const &lifted_dims) { + ASSERT(is_subseteq_of(get_coord_dims(coord), lifted_dims)); + + return DimCoord{ + generate_map( + lifted_dims, + [&](T const &dim) { + if (contains_key(coord.raw, dim)) { + return coord.raw.at(dim); + } else { + return 0_n; + } + }), + }; +} + +template +std::unordered_set> get_coords_in_dim_domain(DimDomain const &dim_domain) { + std::unordered_map> + component_possible_values = map_values( + dim_domain.dims, + [](positive_int component_size) -> std::unordered_set { + return unordered_set_of(nonnegative_range(component_size)); + }); + + return transform( + get_all_assignments(component_possible_values), + [](std::unordered_map const &assignment) { + return DimCoord{ + assignment, + }; + }); +} + +template +std::unordered_set> get_coords_in_minimal_dim_domain(MinimalDimDomain const &minimal_dim_domain) { + return get_coords_in_dim_domain(lift_minimal_dim_domain(minimal_dim_domain)); +} + +template +DimCoord get_maximum_coord_in_domain(DimDomain const &domain) { + return DimCoord{ + map_values( + domain.dims, + [](positive_int dim) -> nonnegative_int { + return nonnegative_int{ + dim.int_from_positive_int() - 1, + }; + }), + }; +} + +template +DimDomain get_domain_for_maximum_coord(DimCoord const &max_coord) { + return DimDomain{ + map_values( + max_coord.raw, + [](nonnegative_int dim) -> positive_int { + return dim + 1_p; + }), + }; +} + +template +bool dim_domain_contains_coord(DimDomain const &domain, + DimCoord const &coord) { + ASSERT(get_domain_dims(domain) == get_coord_dims(coord)); + + std::unordered_set dims = + require_same(get_domain_dims(domain), get_coord_dims(coord)); + return all_of(dims, [&](T const &dim) { + return coord.raw.at(dim) < domain.dims.at(dim); + }); +} + +template +bool minimal_dim_domain_contains_coord(MinimalDimDomain const &domain, DimCoord const &coord) { + return dim_domain_contains_coord(lift_minimal_dim_domain(domain), coord); +} + +template +nonnegative_int flatten_dim_coord(DimCoord const &coord, + DimDomain const &domain, + DimOrdering const &dim_ordering) { + ASSERT( + get_coord_dims(coord) == get_domain_dims(domain), + "flatten_dim_coord expected coord dimensions to match domain dimensions", + coord, + domain); + + OrthotopeCoord orthotope_coord = + orthotope_coord_from_dim_coord(coord, dim_ordering); + Orthotope orthotope_domain = orthotope_from_dim_domain(domain, dim_ordering); + + return flatten_orthotope_coord(orthotope_coord, orthotope_domain); +} + +template +DimCoord unflatten_dim_coord(nonnegative_int flattened, + DimDomain const &domain, + DimOrdering const &dim_ordering) { + Orthotope orthotope_domain = orthotope_from_dim_domain(domain, dim_ordering); + OrthotopeCoord orthotope_coord = + unflatten_orthotope_coord(flattened, orthotope_domain); + + return dim_coord_from_orthotope_coord(orthotope_coord, get_domain_dims(domain), dim_ordering); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/dim_coord.struct.toml b/lib/utils/include/utils/orthotope/dim_coord.struct.toml new file mode 100644 index 0000000000..f6f565922a --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_coord.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "DimCoord" +features = [ + "eq", + "ord", + "fmt", + "hash", +] + +template_params = [ + "T", +] + +includes = [ + "", + "utils/nonnegative_int/nonnegative_int.h", +] + +src_includes = [ + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", +] + +[[fields]] +name = "raw" +type = "std::unordered_map" diff --git a/lib/utils/include/utils/orthotope/dim_domain.h b/lib/utils/include/utils/orthotope/dim_domain.h new file mode 100644 index 0000000000..c3bd389684 --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_domain.h @@ -0,0 +1,73 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_DOMAIN_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_DOMAIN_H + +#include "utils/containers/keys.h" +#include "utils/containers/restrict_keys.h" +#include "utils/containers/sorted_by.h" +#include "utils/containers/transform.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/dim_ordering.dtg.h" +#include "utils/orthotope/orthotope.dtg.h" +#include "utils/containers/map_from_keys_and_values.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/containers/set_minus.h" +#include "utils/containers/filter.h" + +namespace FlexFlow { + +template +DimDomain empty_dim_domain() { + return DimDomain{{}}; +}; + +template +nonnegative_int dim_domain_num_dims(DimDomain const &domain) { + return num_elements(domain.dims); +} + +template +std::unordered_set get_domain_dims(DimDomain const &domain) { + return keys(domain.dims); +} + +template +std::unordered_set get_trivial_domain_dims(DimDomain const &domain) { + return filter(get_domain_dims(domain), + [&](T const &idx) { + return domain.dims.at(idx) == 1; + }); +} + +template +std::unordered_set get_nontrivial_domain_dims(DimDomain const &domain) { + return set_minus(get_domain_dims(domain), get_trivial_domain_dims(domain)); +} + +template +DimDomain restrict_domain_to_dims(DimDomain const &domain, + std::unordered_set const &allowed) { + return DimDomain{restrict_keys(domain.dims, allowed)}; +} + +template +Orthotope orthotope_from_dim_domain(DimDomain const &domain, + DimOrdering const &dim_ordering) { + return Orthotope{ + transform(sorted_by(get_domain_dims(domain), dim_ordering.lt), + [&](T const &t) { return domain.dims.at(t); }), + }; +} + +template +DimDomain dim_domain_from_orthotope(Orthotope const &orthotope, + std::unordered_set const &dims, + DimOrdering const &dim_ordering) { + return DimDomain{ + map_from_keys_and_values( + sorted_by(dims, dim_ordering.lt), orthotope.dims), + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/dim_domain.struct.toml b/lib/utils/include/utils/orthotope/dim_domain.struct.toml new file mode 100644 index 0000000000..d4db1a3efd --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_domain.struct.toml @@ -0,0 +1,27 @@ +namespace = "FlexFlow" +name = "DimDomain" +features = [ + "eq", + "ord", + "fmt", + "hash", + "json", +] + +template_params = [ + "T", +] + +includes = [ + "", + "utils/positive_int/positive_int.h", +] + +src_includes = [ + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", +] + +[[fields]] +name = "dims" +type = "std::unordered_map" diff --git a/lib/utils/include/utils/orthotope/dim_domain_mapping.h b/lib/utils/include/utils/orthotope/dim_domain_mapping.h new file mode 100644 index 0000000000..196d1f3f98 --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_domain_mapping.h @@ -0,0 +1,189 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_DOMAIN_MAPPING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_DOMAIN_MAPPING_H + +#include "utils/bidict/bidict.h" +#include "utils/orthotope/dim_coord.dtg.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/dim_ordering.dtg.h" +#include "utils/orthotope/dim_projection.h" +#include "utils/orthotope/dim_coord.h" +#include "utils/bidict/algorithms/left_entries.h" +#include "utils/bidict/algorithms/right_entries.h" +#include "utils/bidict/algorithms/exhaustive_relational_join.h" +#include "utils/hash/tuple.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" + +namespace FlexFlow { + +template +struct DimDomainMapping { +public: + explicit DimDomainMapping( + bidict, DimCoord> const &coord_mapping, + DimDomain const &l_domain, + DimDomain const &r_domain) + : coord_mapping(coord_mapping), + l_domain(l_domain), + r_domain(r_domain) + { + ASSERT(get_coords_in_dim_domain(l_domain) == left_entries(coord_mapping)); + ASSERT(get_coords_in_dim_domain(r_domain) == right_entries(coord_mapping)); + } + + DimCoord at_l(DimCoord const &l_coord) const { + ASSERT(dim_domain_contains_coord(this->l_domain, l_coord)); + + return this->coord_mapping.at_l(l_coord); + } + + DimCoord at_r(DimCoord const &r_coord) const { + ASSERT(dim_domain_contains_coord(this->r_domain, r_coord)); + + return this->coord_mapping.at_r(r_coord); + } + + bool operator==(DimDomainMapping const &other) const { + return this->tie() == other.tie(); + } + + bool operator!=(DimDomainMapping const &other) const { + return this->tie() != other.tie(); + } + +public: + bidict, DimCoord> coord_mapping; + DimDomain l_domain; + DimDomain r_domain; + +private: + std::tuple< + decltype(coord_mapping) const &, + decltype(l_domain) const &, + decltype(r_domain) const & + > tie() const { + return std::tie( + this->coord_mapping, + this->l_domain, + this->r_domain); + } + + friend struct ::std::hash>; +}; + +template +std::string format_as(DimDomainMapping const &m) { + CHECK_FMTABLE(L); + CHECK_FMTABLE(R); + + return fmt::format( + "", + m.l_domain, + m.r_domain, + m.coord_mapping); +} + +template +std::ostream &operator<<(std::ostream &s, DimDomainMapping const &m) { + CHECK_FMTABLE(L); + CHECK_FMTABLE(R); + + return (s << fmt::to_string(m)); +} + +template +DimDomainMapping empty_dim_domain_mapping() { + return DimDomainMapping{ + /*coord_mapping=*/{ + {DimCoord{{}}, DimCoord{{}}}, + }, + /*l_domain=*/empty_dim_domain(), + /*r_domain=*/empty_dim_domain(), + }; +} + +template +DimDomainMapping dim_domain_mapping_identity_map( + DimDomain const &l_domain, + DimDomain const &r_domain, + DimOrdering const &l_dim_ordering, + DimOrdering const &r_dim_ordering) { + DimProjection projection = + dim_projection_identity_map(l_domain, r_domain, l_dim_ordering, r_dim_ordering); + + return dim_domain_mapping_from_projection( + /*projection=*/projection, + /*l_domain=*/l_domain, + /*r_domain=*/r_domain, + /*l_dim_ordering=*/l_dim_ordering, + /*r_dim_ordering=*/r_dim_ordering); +} + +template +DimDomainMapping invert_dim_domain_mapping( + DimDomainMapping const &dim_domain_mapping) { + + return DimDomainMapping{ + /*coord_mapping=*/dim_domain_mapping.coord_mapping.reversed(), + /*l_domain=*/dim_domain_mapping.r_domain, + /*r_domain=*/dim_domain_mapping.l_domain, + }; +} + +template +DimDomainMapping compose_dim_domain_mappings( + DimDomainMapping const &lhs, + DimDomainMapping const &rhs) { + + ASSERT(lhs.r_domain == rhs.l_domain); + + return DimDomainMapping{ + /*coord_mapping=*/exhaustive_relational_join( + lhs.coord_mapping, + rhs.coord_mapping), + /*l_domain=*/lhs.l_domain, + /*r_domain=*/rhs.r_domain, + }; +} + + +template +DimDomainMapping dim_domain_mapping_from_projection( + DimProjection const &projection, + DimDomain const &l_domain, + DimDomain const &r_domain, + DimOrdering const &l_dim_ordering, + DimOrdering const &r_dim_ordering) { + + return DimDomainMapping{ + /*coord_mapping=*/generate_bidict( + get_coords_in_dim_domain(l_domain), + [&](DimCoord const &l_coord) { + return compute_dim_projection( + /*projection=*/projection, + /*input_coord=*/l_coord, + /*input_domain=*/l_domain, + /*output_domain=*/r_domain, + /*input_dim_ordering=*/l_dim_ordering, + /*output_dim_ordering=*/r_dim_ordering); + }), + /*l_domain=*/l_domain, + /*r_domain=*/r_domain, + }; +} + +} // namespace FlexFlow + +namespace std { + +template +struct hash<::FlexFlow::DimDomainMapping> { + size_t operator()(::FlexFlow::DimDomainMapping const &dim_domain_mapping) const { + return get_std_hash(dim_domain_mapping.tie()); + } +}; + +} // namespace std + +#endif diff --git a/lib/utils/include/utils/orthotope/dim_ordering.h b/lib/utils/include/utils/orthotope/dim_ordering.h new file mode 100644 index 0000000000..774f1798a0 --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_ordering.h @@ -0,0 +1,36 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_ORDERING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_ORDERING_H + +#include "utils/bidict/algorithms/bidict_from_enumerating.h" +#include "utils/orthotope/dim_ordering.dtg.h" + +namespace FlexFlow { + +template +DimOrdering make_default_dim_ordering() { + return DimOrdering{ + [](T const &lhs, T const &rhs) -> bool { return lhs < rhs; }, + }; +} + +template +DimOrdering make_reversed_dim_ordering() { + return DimOrdering{ + [](T const &lhs, T const &rhs) -> bool { return rhs < lhs; }, + }; +} + +template +DimOrdering make_dim_ordering_from_vector(std::vector const &v) { + bidict v_as_map = bidict_from_enumerating(v); + + return DimOrdering{ + [=](T const &lhs, T const &rhs) -> bool { + return v_as_map.at_r(lhs) <= v_as_map.at_r(rhs); + }, + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/dim_ordering.struct.toml b/lib/utils/include/utils/orthotope/dim_ordering.struct.toml new file mode 100644 index 0000000000..3fc0d11ff4 --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_ordering.struct.toml @@ -0,0 +1,15 @@ +namespace = "FlexFlow" +name = "DimOrdering" +features = [] + +template_params = [ + "T", +] + +includes = [ + "", +] + +[[fields]] +name = "lt" +type = "std::function" diff --git a/lib/utils/include/utils/orthotope/dim_projection.h b/lib/utils/include/utils/orthotope/dim_projection.h new file mode 100644 index 0000000000..167307a76d --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_projection.h @@ -0,0 +1,218 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_PROJECTION_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DIM_PROJECTION_H + +#include "utils/orthotope/dim_coord.h" +#include "utils/orthotope/dim_projection.dtg.h" +#include "utils/orthotope/down_projection.h" +#include "utils/orthotope/eq_projection.h" +#include "utils/orthotope/up_projection.h" +#include "utils/overload.h" +#include "utils/bidict/algorithms/bidict_from_keys_and_values.h" + +namespace FlexFlow { + +template +DimProjection dim_projection_identity_map(DimDomain const &input_domain, + DimDomain const &output_domain, + DimOrdering const &input_dim_ordering, + DimOrdering const &output_dim_ordering) { + + std::vector input_dims = sorted_by(get_domain_dims(input_domain), + input_dim_ordering.lt); + + std::vector output_dims = sorted_by(get_domain_dims(output_domain), + output_dim_ordering.lt); + + return DimProjection{ + EqProjection{ + bidict_from_keys_and_values(input_dims, output_dims), + }, + }; +} + +template +std::unordered_set + input_dims_of_projection(DimProjection const &projection) { + return projection.template visit>(overload{ + [](UpProjection const &p) { + return input_dims_of_up_projection(p); + }, + [](EqProjection const &p) { + return input_dims_of_eq_projection(p); + }, + [](DownProjection const &p) { + return input_dims_of_down_projection(p); + }, + }); +} + +template +std::unordered_set + output_dims_of_projection(DimProjection const &projection) { + return projection.template visit>(overload{ + [](UpProjection const &p) { + return output_dims_of_up_projection(p); + }, + [](EqProjection const &p) { + return output_dims_of_eq_projection(p); + }, + [](DownProjection const &p) { + return output_dims_of_down_projection(p); + }, + }); +}; + +template +DimProjection invert_dim_projection(DimProjection const &projection) { + return projection.template visit>(overload{ + [](UpProjection const &p) { + return DimProjection{ + invert_up_projection(p), + }; + }, + [](EqProjection const &p) { + return DimProjection{ + invert_eq_projection(p), + }; + }, + [](DownProjection const &p) { + return DimProjection{ + invert_down_projection(p), + }; + }, + }); +} + +template +DimCoord compute_dim_projection(DimProjection const &projection, + DimCoord const &input_coord, + DimDomain const &input_domain, + DimDomain const &output_domain, + DimOrdering const &input_dim_ordering, + DimOrdering const &output_dim_ordering) { + ASSERT(dim_domain_contains_coord(input_domain, input_coord), + input_domain, + input_coord); + ASSERT(get_domain_dims(input_domain) == input_dims_of_projection(projection)); + ASSERT(get_domain_dims(output_domain) == + output_dims_of_projection(projection)); + + DimCoord output_coord = projection.template visit>(overload{ + [&](UpProjection const &p) -> DimCoord { + return compute_up_projection( + p, input_coord, output_domain, output_dim_ordering); + }, + [&](EqProjection const &p) -> DimCoord { + return compute_eq_projection(p, input_coord); + }, + [&](DownProjection const &p) -> DimCoord { + return compute_down_projection( + p, input_coord, input_domain, input_dim_ordering); + }, + }); + + ASSERT(dim_domain_contains_coord(output_domain, output_coord), + output_domain, + output_coord, + input_coord, + input_domain); + + return output_coord; +} + + +template +DimProjection right_compose_eq_projection( + DimProjection const &lhs, + EqProjection const &rhs) { + return lhs.template visit>(overload{ + [&](UpProjection const &lhs_up_proj) { + return DimProjection{ + compose_up_projections( + lhs_up_proj, + up_from_eq_proj(rhs)), + }; + }, + [&](EqProjection const &lhs_eq_proj) { + return DimProjection{ + compose_eq_projections( + lhs_eq_proj, + rhs), + }; + }, + [&](DownProjection const &lhs_down_proj) { + return DimProjection{ + compose_down_projections( + lhs_down_proj, + down_from_eq_proj(rhs)), + }; + }, + }); +} + +template +DimProjection left_compose_eq_projection( + EqProjection const &lhs, + DimProjection const &rhs) { + return rhs.template visit>(overload{ + [&](UpProjection const &rhs_up_proj) { + return DimProjection{ + compose_up_projections( + up_from_eq_proj(lhs), + rhs_up_proj), + }; + }, + [&](EqProjection const &rhs_eq_proj) { + return DimProjection{ + compose_eq_projections( + lhs, + rhs_eq_proj), + }; + }, + [&](DownProjection const &rhs_down_proj) { + return DimProjection{ + compose_down_projections( + down_from_eq_proj(lhs), + rhs_down_proj), + }; + }, + }); +} + +template +DimProjection compose_dim_projections( + DimProjection const &lhs, + DimProjection const &rhs) { + + if (lhs.is_eq_proj()) { + return DimProjection{ + left_compose_eq_projection( + lhs.require_eq_proj(), + rhs), + }; + } else if (rhs.is_eq_proj()) { + return DimProjection{ + right_compose_eq_projection( + lhs, + rhs.require_eq_proj()), + }; + } else if (lhs.is_up_proj() && rhs.is_up_proj()) { + return DimProjection{ + compose_up_projections( + lhs.require_up_proj(), + rhs.require_up_proj()), + }; + } else if (lhs.is_down_proj() && rhs.is_down_proj()) { + return DimProjection{ + compose_down_projections( + lhs.require_down_proj(), + rhs.require_down_proj()), + }; + } else { + PANIC("Cannot compose projections", lhs, rhs); + } +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/dim_projection.variant.toml b/lib/utils/include/utils/orthotope/dim_projection.variant.toml new file mode 100644 index 0000000000..b22e327259 --- /dev/null +++ b/lib/utils/include/utils/orthotope/dim_projection.variant.toml @@ -0,0 +1,29 @@ +namespace = "FlexFlow" +name = "DimProjection" +features = [ + "eq", + "hash", + "fmt", +] + +template_params = [ + "L", "R" +] + +includes = [ + "utils/orthotope/up_projection.dtg.h", + "utils/orthotope/eq_projection.dtg.h", + "utils/orthotope/down_projection.dtg.h", +] + +[[values]] +type = "::FlexFlow::UpProjection" +key = "up_proj" + +[[values]] +type = "::FlexFlow::EqProjection" +key = "eq_proj" + +[[values]] +type = "::FlexFlow::DownProjection" +key = "down_proj" diff --git a/lib/utils/include/utils/orthotope/down_projection.h b/lib/utils/include/utils/orthotope/down_projection.h new file mode 100644 index 0000000000..b7aa0809a9 --- /dev/null +++ b/lib/utils/include/utils/orthotope/down_projection.h @@ -0,0 +1,101 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DOWN_PROJECTION_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_DOWN_PROJECTION_H + +#include "utils/many_to_one/exhaustive_relational_join.h" +#include "utils/many_to_one/invert_many_to_one.h" +#include "utils/many_to_one/many_to_one_from_bidict.h" +#include "utils/orthotope/dim_coord.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/dim_ordering.dtg.h" +#include "utils/orthotope/down_projection.dtg.h" +#include "utils/orthotope/eq_projection.dtg.h" +#include "utils/orthotope/orthotope.dtg.h" +#include "utils/orthotope/orthotope.h" +#include "utils/orthotope/orthotope_coord.dtg.h" +#include "utils/orthotope/up_projection.dtg.h" + +namespace FlexFlow { + +template +DownProjection make_empty_down_projection() { + return DownProjection{ManyToOne{}}; +} + +template +std::unordered_set + input_dims_of_down_projection(DownProjection const &projection) { + return projection.dim_mapping.left_values(); +} + +template +std::unordered_set + output_dims_of_down_projection(DownProjection const &projection) { + return projection.dim_mapping.right_values(); +} + +template +DimCoord compute_down_projection(DownProjection const &projection, + DimCoord const &coord, + DimDomain const &input_domain, + DimOrdering const &input_dim_ordering) { + std::unordered_set input_dims = input_dims_of_down_projection(projection); + std::unordered_set coord_dims = get_coord_dims(coord); + ASSERT(input_dims == coord_dims, + "compute_down_projection expected coord dimensions to match " + "projection input dimensions"); + + std::unordered_set output_dims = + output_dims_of_down_projection(projection); + + return DimCoord{ + generate_map( + output_dims, + [&](R const &output_dim) { + std::unordered_set src_dims = + projection.dim_mapping.at_r(output_dim); + + DimCoord src_coord = restrict_coord_to_dims(coord, src_dims); + DimDomain src_domain = + restrict_domain_to_dims(input_domain, src_dims); + + return flatten_dim_coord(src_coord, src_domain, input_dim_ordering); + }), + }; +} + +template +void project_dims(DownProjection &proj, + std::unordered_set const &from, + R const &onto) { + for (L const &l : from) { + proj.dim_mapping.insert({l, onto}); + } +} + +template +UpProjection + invert_down_projection(DownProjection const &down_proj) { + return UpProjection{ + invert_many_to_one(down_proj.dim_mapping), + }; +} + +template +DownProjection + compose_down_projections(DownProjection const &fst, + DownProjection const &snd) { + return DownProjection{ + exhaustive_relational_join(fst.dim_mapping, snd.dim_mapping), + }; +} + +template +DownProjection down_from_eq_proj(EqProjection const &eq) { + return DownProjection{ + many_to_one_from_bidict(eq.dim_mapping), + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/down_projection.struct.toml b/lib/utils/include/utils/orthotope/down_projection.struct.toml new file mode 100644 index 0000000000..419434905b --- /dev/null +++ b/lib/utils/include/utils/orthotope/down_projection.struct.toml @@ -0,0 +1,19 @@ +namespace = "FlexFlow" +name = "DownProjection" +features = [ + "eq", + "hash", + "fmt", +] + +template_params = [ + "L", "R" +] + +includes = [ + "utils/many_to_one/many_to_one.h", +] + +[[fields]] +name = "dim_mapping" +type = "::FlexFlow::ManyToOne" diff --git a/lib/utils/include/utils/orthotope/eq_projection.h b/lib/utils/include/utils/orthotope/eq_projection.h new file mode 100644 index 0000000000..4b8aef69dd --- /dev/null +++ b/lib/utils/include/utils/orthotope/eq_projection.h @@ -0,0 +1,63 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_EQ_PROJECTION_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_EQ_PROJECTION_H + +#include "utils/bidict/algorithms/exhaustive_relational_join.h" +#include "utils/containers/map_keys.h" +#include "utils/orthotope/dim_coord.dtg.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/eq_projection.dtg.h" + +namespace FlexFlow { + +template +EqProjection make_empty_eq_projection() { + return EqProjection{bidict{}}; +} + +template +std::unordered_set + input_dims_of_eq_projection(EqProjection const &projection) { + return projection.dim_mapping.left_values(); +} + +template +std::unordered_set + output_dims_of_eq_projection(EqProjection const &projection) { + return projection.dim_mapping.right_values(); +} + +template +void project_dims(EqProjection &proj, + L const &from, + R const &to) { + proj.dim_mapping.equate(from, to); +} + +template +EqProjection invert_eq_projection(EqProjection const &input) { + return EqProjection{ + input.dim_mapping.reversed(), + }; +} + +template +EqProjection compose_eq_projections(EqProjection const &fst, + EqProjection const &snd) { + return EqProjection{ + exhaustive_relational_join(fst.dim_mapping, snd.dim_mapping)}; +} + +template +DimCoord compute_eq_projection(EqProjection const &projection, + DimCoord const &coord) { + return DimCoord{ + map_keys(coord.raw, + [&](L const &input_dim) -> R { + return projection.dim_mapping.at_l(input_dim); + }), + }; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/eq_projection.struct.toml b/lib/utils/include/utils/orthotope/eq_projection.struct.toml new file mode 100644 index 0000000000..ba9718cba0 --- /dev/null +++ b/lib/utils/include/utils/orthotope/eq_projection.struct.toml @@ -0,0 +1,20 @@ +namespace = "FlexFlow" +name = "EqProjection" +features = [ + "eq", + "hash", + "fmt", + "rapidcheck", +] + +template_params = [ + "L", "R" +] + +includes = [ + "utils/bidict/bidict.h", +] + +[[fields]] +name = "dim_mapping" +type = "::FlexFlow::bidict" diff --git a/lib/utils/include/utils/orthotope/minimal_dim_domain.h b/lib/utils/include/utils/orthotope/minimal_dim_domain.h new file mode 100644 index 0000000000..9e909e1431 --- /dev/null +++ b/lib/utils/include/utils/orthotope/minimal_dim_domain.h @@ -0,0 +1,117 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_MINIMAL_DIM_DOMAIN_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_MINIMAL_DIM_DOMAIN_H + +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/dim_ordering.dtg.h" +#include "utils/orthotope/minimal_orthotope.dtg.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" +#include "utils/containers/map_values.h" +#include "utils/containers/filtermap_values.h" +#include "utils/containers/keys.h" +#include "utils/containers/transform.h" +#include "utils/containers/sorted_by.h" +#include "utils/containers/map_from_keys_and_values.h" +#include "utils/containers/restrict_keys.h" +#include "utils/nonnegative_int/num_elements.h" +#include "utils/containers/are_disjoint.h" +#include "utils/containers/generate_map.h" +#include "utils/containers/binary_merge_disjoint_maps.h" + +namespace FlexFlow { + +template +MinimalDimDomain empty_minimal_dim_domain() { + return MinimalDimDomain{{}}; +} + +template +nonnegative_int minimal_dim_domain_num_dims(MinimalDimDomain const &domain) { + return num_elements(domain.dims); +} + +template +DimDomain lift_minimal_dim_domain(MinimalDimDomain const &minimal_dim_domain) { + return DimDomain{ + map_values(minimal_dim_domain.dims, + [](int_ge_two component) { + return component.positive_int_from_int_ge_two(); + }), + }; +} + +template +MinimalDimDomain require_dim_domain_is_minimal(DimDomain const &dim_domain) { + return MinimalDimDomain{ + map_values(dim_domain.dims, + [](positive_int dim_size) { + return int_ge_two{dim_size}; + }), + }; +} + +template +MinimalDimDomain minimal_dim_domain_from_dim_domain(DimDomain const &dim_domain) { + return MinimalDimDomain{ + filtermap_values(dim_domain.dims, try_int_ge_two_from_positive_int) + }; +} + +template +DimDomain dim_domain_from_minimal_dim_domain(MinimalDimDomain const &minimal_dim_domain, + std::unordered_set const &trivial_dims) { + std::unordered_set nontrivial_dims = get_minimal_domain_dims(minimal_dim_domain); + + ASSERT(are_disjoint(nontrivial_dims, trivial_dims)); + + return DimDomain{ + /*dims=*/binary_merge_disjoint_maps( + map_values( + minimal_dim_domain.dims, + [](int_ge_two x) { + return x.positive_int_from_int_ge_two(); + }), + generate_map(trivial_dims, + [](T const &) { + return 1_p; + })), + }; +} + +template +std::unordered_set get_minimal_domain_dims(MinimalDimDomain const &domain) { + return keys(domain.dims); +} + +template +MinimalDimDomain restrict_minimal_domain_to_dims(MinimalDimDomain const &domain, + std::unordered_set const &allowed) { + return MinimalDimDomain{restrict_keys(domain.dims, allowed)}; +} + + +template +MinimalOrthotope minimal_orthotope_from_minimal_dim_domain( + MinimalDimDomain const &domain, + DimOrdering const &dim_ordering) { + + return MinimalOrthotope{ + transform(sorted_by(get_minimal_domain_dims(domain), dim_ordering.lt), + [&](T const &t) { return domain.dims.at(t); }), + }; +} + +template +MinimalDimDomain minimal_dim_domain_from_minimal_orthotope( + MinimalOrthotope const &orthotope, + std::unordered_set const &dims, + DimOrdering const &dim_ordering) { + + return MinimalDimDomain{ + map_from_keys_and_values( + sorted_by(dims, dim_ordering.lt), orthotope.dims), + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/minimal_dim_domain.struct.toml b/lib/utils/include/utils/orthotope/minimal_dim_domain.struct.toml new file mode 100644 index 0000000000..0ecc84bf34 --- /dev/null +++ b/lib/utils/include/utils/orthotope/minimal_dim_domain.struct.toml @@ -0,0 +1,27 @@ +namespace = "FlexFlow" +name = "MinimalDimDomain" +features = [ + "eq", + "ord", + "fmt", + "hash", + "json", +] + +template_params = [ + "T", +] + +includes = [ + "", + "utils/int_ge_two/int_ge_two.h", +] + +src_includes = [ + "utils/fmt/unordered_map.h", + "utils/hash/unordered_map.h", +] + +[[fields]] +name = "dims" +type = "std::unordered_map" diff --git a/lib/utils/include/utils/orthotope/minimal_dim_domain_mapping.h b/lib/utils/include/utils/orthotope/minimal_dim_domain_mapping.h new file mode 100644 index 0000000000..d35b99a5f4 --- /dev/null +++ b/lib/utils/include/utils/orthotope/minimal_dim_domain_mapping.h @@ -0,0 +1,272 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_MINIMAL_DIM_DOMAIN_MAPPING_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_MINIMAL_DIM_DOMAIN_MAPPING_H + +#include "utils/bidict/bidict.h" +#include "utils/orthotope/dim_coord.dtg.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" +#include "utils/orthotope/dim_domain_mapping.h" +#include "utils/orthotope/dim_ordering.dtg.h" +#include "utils/orthotope/dim_projection.h" +#include "utils/orthotope/dim_coord.h" +#include "utils/bidict/algorithms/left_entries.h" +#include "utils/bidict/algorithms/right_entries.h" +#include "utils/bidict/algorithms/exhaustive_relational_join.h" +#include "utils/hash/tuple.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" +#include "utils/orthotope/minimal_dim_domain.dtg.h" +#include "utils/bidict/algorithms/transform_values.h" +#include "utils/bidict/algorithms/transform_keys.h" + +namespace FlexFlow { + +template +struct MinimalDimDomainMapping { +public: + explicit MinimalDimDomainMapping( + bidict, DimCoord> const &coord_mapping, + MinimalDimDomain const &l_domain, + MinimalDimDomain const &r_domain) + : coord_mapping(coord_mapping), + l_domain(l_domain), + r_domain(r_domain) + { + ASSERT(get_coords_in_minimal_dim_domain(l_domain) == left_entries(coord_mapping)); + ASSERT(get_coords_in_minimal_dim_domain(r_domain) == right_entries(coord_mapping)); + } + + DimCoord at_l(DimCoord const &l_coord) const { + ASSERT(minimal_dim_domain_contains_coord(this->l_domain, l_coord)); + + return this->coord_mapping.at_l(l_coord); + } + + DimCoord at_r(DimCoord const &r_coord) const { + ASSERT(minimal_dim_domain_contains_coord(this->r_domain, r_coord)); + + return this->coord_mapping.at_r(r_coord); + } + + bool operator==(MinimalDimDomainMapping const &other) const { + return this->tie() == other.tie(); + } + + bool operator!=(MinimalDimDomainMapping const &other) const { + return this->tie() != other.tie(); + } + +public: + bidict, DimCoord> coord_mapping; + MinimalDimDomain l_domain; + MinimalDimDomain r_domain; + +private: + std::tuple< + decltype(coord_mapping) const &, + decltype(l_domain) const &, + decltype(r_domain) const & + > tie() const { + return std::tie( + this->coord_mapping, + this->l_domain, + this->r_domain); + } + + friend struct ::std::hash>; +}; + +template +std::string format_as(MinimalDimDomainMapping const &m) { + CHECK_FMTABLE(L); + CHECK_FMTABLE(R); + + return fmt::format( + "", + m.l_domain, + m.r_domain, + m.coord_mapping); +} + +template +std::ostream &operator<<(std::ostream &s, MinimalDimDomainMapping const &m) { + CHECK_FMTABLE(L); + CHECK_FMTABLE(R); + + return (s << fmt::to_string(m)); +} + +template +MinimalDimDomainMapping minimal_mapping_from_dim_domain_mapping( + DimDomainMapping const &m) { + + std::unordered_set l_nontrivial_dims = + get_nontrivial_domain_dims(m.l_domain); + + std::unordered_set r_nontrivial_dims = + get_nontrivial_domain_dims(m.r_domain); + + return MinimalDimDomainMapping{ + /*coord_mapping=*/ + transform_keys( + transform_values( + m.coord_mapping, + [&](DimCoord const &r_coord) { + return restrict_coord_to_dims(r_coord, r_nontrivial_dims); + }), + [&](DimCoord const &l_coord) { + return restrict_coord_to_dims(l_coord, l_nontrivial_dims); + }), + /*l_domain=*/minimal_dim_domain_from_dim_domain(m.l_domain), + /*r_domain=*/minimal_dim_domain_from_dim_domain(m.r_domain), + }; +} + +template +DimDomainMapping dim_domain_mapping_from_minimal_dim_domain( + MinimalDimDomainMapping const &m, + std::unordered_set const &l_trivial_dims, + std::unordered_set const &r_trivial_dims) { + + + DimDomain l_domain = dim_domain_from_minimal_dim_domain(m.l_domain, l_trivial_dims); + DimDomain r_domain = dim_domain_from_minimal_dim_domain(m.r_domain, r_trivial_dims); + + std::unordered_set all_l_dims = get_domain_dims(l_domain); + std::unordered_set all_r_dims = get_domain_dims(r_domain); + + return DimDomainMapping{ + /*coord_mapping=*/ + transform_keys( + transform_values( + m.coord_mapping, + [&](DimCoord const &r_coord) { + return lift_dim_coord(r_coord, all_r_dims); + }), + [&](DimCoord const &l_coord) { + return lift_dim_coord(l_coord, all_l_dims); + }), + /*l_domain=*/l_domain, + /*r_domain=*/r_domain, + }; +} + +template +MinimalDimDomainMapping empty_minimal_dim_domain_mapping() { + return MinimalDimDomainMapping{ + /*coord_mapping=*/{}, + /*l_domain=*/empty_minimal_dim_domain(), + /*r_domain=*/empty_minimal_dim_domain(), + }; +} + +template +MinimalDimDomainMapping minimal_dim_domain_mapping_identity_map( + MinimalDimDomain const &l_domain, + MinimalDimDomain const &r_domain, + DimOrdering const &l_dim_ordering, + DimOrdering const &r_dim_ordering) { + DimProjection projection = + dim_projection_identity_map( + lift_minimal_dim_domain(l_domain), + lift_minimal_dim_domain(r_domain), + l_dim_ordering, + r_dim_ordering); + + return minimal_dim_domain_mapping_from_projection( + /*projection=*/projection, + /*l_domain=*/l_domain, + /*r_domain=*/r_domain, + /*l_dim_ordering=*/l_dim_ordering, + /*r_dim_ordering=*/r_dim_ordering); +} + +template +MinimalDimDomainMapping invert_minimal_dim_domain_mapping( + MinimalDimDomainMapping const &minimal_dim_domain_mapping) { + + return MinimalDimDomainMapping{ + /*coord_mapping=*/minimal_dim_domain_mapping.coord_mapping.reversed(), + /*l_domain=*/minimal_dim_domain_mapping.r_domain, + /*r_domain=*/minimal_dim_domain_mapping.l_domain, + }; +} + +template +MinimalDimDomainMapping compose_minimal_dim_domain_mappings( + MinimalDimDomainMapping const &lhs, + MinimalDimDomainMapping const &rhs) { + + ASSERT(lhs.r_domain == rhs.l_domain); + + return MinimalDimDomainMapping{ + /*coord_mapping=*/exhaustive_relational_join( + lhs.coord_mapping, + rhs.coord_mapping), + /*l_domain=*/lhs.l_domain, + /*r_domain=*/rhs.r_domain, + }; +} + +template +DimDomainMapping compose_dim_domain_mappings_through_minimal( + DimDomainMapping const &lhs, + DimDomainMapping const &rhs) { + + MinimalDimDomainMapping minimal_lhs = + minimal_mapping_from_dim_domain_mapping(lhs); + + std::unordered_set t1_trivial_dims + = get_trivial_domain_dims(lhs.l_domain); + + MinimalDimDomainMapping minimal_rhs = + minimal_mapping_from_dim_domain_mapping(rhs); + + std::unordered_set t3_trivial_dims + = get_trivial_domain_dims(rhs.r_domain); + + return + dim_domain_mapping_from_minimal_dim_domain( + compose_minimal_dim_domain_mappings(minimal_lhs, minimal_rhs), + t1_trivial_dims, + t3_trivial_dims); +} + +template +MinimalDimDomainMapping minimal_dim_domain_mapping_from_projection( + DimProjection const &projection, + MinimalDimDomain const &l_domain, + MinimalDimDomain const &r_domain, + DimOrdering const &l_dim_ordering, + DimOrdering const &r_dim_ordering) { + + return MinimalDimDomainMapping{ + /*coord_mapping=*/generate_bidict( + get_coords_in_minimal_dim_domain(l_domain), + [&](DimCoord const &l_coord) { + return compute_dim_projection( + /*projection=*/projection, + /*input_coord=*/l_coord, + /*input_domain=*/lift_minimal_dim_domain(l_domain), + /*output_domain=*/lift_minimal_dim_domain(r_domain), + /*input_dim_ordering=*/l_dim_ordering, + /*output_dim_ordering=*/r_dim_ordering); + }), + /*l_domain=*/l_domain, + /*r_domain=*/r_domain, + }; +} + +} // namespace FlexFlow + +namespace std { + +template +struct hash<::FlexFlow::MinimalDimDomainMapping> { + size_t operator()(::FlexFlow::MinimalDimDomainMapping const &minimal_dim_domain_mapping) const { + return get_std_hash(minimal_dim_domain_mapping.tie()); + } +}; + +} // namespace std + +#endif diff --git a/lib/utils/include/utils/orthotope/minimal_orthotope.h b/lib/utils/include/utils/orthotope/minimal_orthotope.h new file mode 100644 index 0000000000..de6a66378b --- /dev/null +++ b/lib/utils/include/utils/orthotope/minimal_orthotope.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_MINIMAL_ORTHOTOPE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_MINIMAL_ORTHOTOPE_H + +#include "utils/orthotope/orthotope.dtg.h" +#include "utils/orthotope/minimal_orthotope.dtg.h" + +namespace FlexFlow { + +nonnegative_int minimal_orthotope_get_num_dims(MinimalOrthotope const &); +positive_int minimal_orthotope_get_volume(MinimalOrthotope const &); + +MinimalOrthotope require_orthotope_is_minimal(Orthotope const &); +Orthotope orthotope_from_minimal_orthotope(MinimalOrthotope const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/minimal_orthotope.struct.toml b/lib/utils/include/utils/orthotope/minimal_orthotope.struct.toml new file mode 100644 index 0000000000..4a9aa29cfa --- /dev/null +++ b/lib/utils/include/utils/orthotope/minimal_orthotope.struct.toml @@ -0,0 +1,24 @@ +namespace = "FlexFlow" +name = "MinimalOrthotope" +features = [ + "eq", + "ord", + "fmt", + "hash", + "json", + "rapidcheck", +] + +includes = [ + "", + "utils/int_ge_two/int_ge_two.h", +] + +src_includes = [ + "utils/fmt/vector.h", + "utils/hash/vector.h", +] + +[[fields]] +name = "dims" +type = "std::vector<::FlexFlow::int_ge_two>" diff --git a/lib/utils/include/utils/orthotope/orthotope.h b/lib/utils/include/utils/orthotope/orthotope.h new file mode 100644 index 0000000000..509497ff00 --- /dev/null +++ b/lib/utils/include/utils/orthotope/orthotope.h @@ -0,0 +1,32 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_ORTHOTOPE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_ORTHOTOPE_H + +#include "utils/orthotope/orthotope.dtg.h" +#include "utils/orthotope/orthotope_coord.dtg.h" + +namespace FlexFlow { + +nonnegative_int orthotope_get_num_dims(Orthotope const &); + +positive_int orthotope_get_volume(Orthotope const &); + +std::unordered_set + get_all_coords_in_orthotope(Orthotope const &); + +bool orthotope_contains_coord(Orthotope const &, OrthotopeCoord const &); + +Orthotope restrict_orthotope_to_dims(Orthotope const &, + std::set const &); + +nonnegative_int flatten_orthotope_coord(OrthotopeCoord const &, + Orthotope const &); + +OrthotopeCoord orthotope_get_maximum_coord(Orthotope const &); + +nonnegative_int orthotope_get_maximum_offset(Orthotope const &); + +OrthotopeCoord unflatten_orthotope_coord(nonnegative_int, Orthotope const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/operator_task_space.struct.toml b/lib/utils/include/utils/orthotope/orthotope.struct.toml similarity index 71% rename from lib/pcg/include/pcg/operator_task_space.struct.toml rename to lib/utils/include/utils/orthotope/orthotope.struct.toml index 389e12e8f2..2ffcb6960a 100644 --- a/lib/pcg/include/pcg/operator_task_space.struct.toml +++ b/lib/utils/include/utils/orthotope/orthotope.struct.toml @@ -1,24 +1,23 @@ namespace = "FlexFlow" -name = "OperatorTaskSpace" +name = "Orthotope" features = [ "eq", "ord", + "fmt", "hash", "json", - "rapidcheck", - "fmt", ] -includes = [ +includes = [ "", "utils/positive_int/positive_int.h", ] src_includes = [ "utils/fmt/vector.h", - "utils/hash/vector.h" + "utils/hash/vector.h", ] [[fields]] -name = "degrees" +name = "dims" type = "std::vector<::FlexFlow::positive_int>" diff --git a/lib/utils/include/utils/orthotope/orthotope_coord.h b/lib/utils/include/utils/orthotope/orthotope_coord.h new file mode 100644 index 0000000000..97d9afa03c --- /dev/null +++ b/lib/utils/include/utils/orthotope/orthotope_coord.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_ORTHOTOPE_COORD_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_ORTHOTOPE_COORD_H + +#include "utils/orthotope/orthotope_coord.dtg.h" + +namespace FlexFlow { + +nonnegative_int orthotope_coord_num_dims(OrthotopeCoord const &); + +OrthotopeCoord restrict_orthotope_coord_to_dims( + OrthotopeCoord const &coord, std::set const &allowed_dims); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/task_space_coordinate.struct.toml b/lib/utils/include/utils/orthotope/orthotope_coord.struct.toml similarity index 77% rename from lib/pcg/include/pcg/task_space_coordinate.struct.toml rename to lib/utils/include/utils/orthotope/orthotope_coord.struct.toml index 1057676b8e..a66220c611 100644 --- a/lib/pcg/include/pcg/task_space_coordinate.struct.toml +++ b/lib/utils/include/utils/orthotope/orthotope_coord.struct.toml @@ -1,24 +1,23 @@ namespace = "FlexFlow" -name = "TaskSpaceCoordinate" +name = "OrthotopeCoord" features = [ "eq", "ord", + "fmt", "hash", "json", - # "rapidcheck", - "fmt", ] -includes = [ +includes = [ "", "utils/nonnegative_int/nonnegative_int.h", ] src_includes = [ - "utils/hash/vector.h", "utils/fmt/vector.h", + "utils/hash/vector.h", ] [[fields]] -name = "raw_coord" +name = "raw" type = "std::vector<::FlexFlow::nonnegative_int>" diff --git a/lib/utils/include/utils/orthotope/up_projection.h b/lib/utils/include/utils/orthotope/up_projection.h new file mode 100644 index 0000000000..9f44ad580e --- /dev/null +++ b/lib/utils/include/utils/orthotope/up_projection.h @@ -0,0 +1,103 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_UP_PROJECTION_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_ORTHOTOPE_UP_PROJECTION_H + +#include "utils/containers/flatmap.h" +#include "utils/containers/keys.h" +#include "utils/containers/values.h" +#include "utils/one_to_many/exhaustive_relational_join.h" +#include "utils/one_to_many/invert_one_to_many.h" +#include "utils/one_to_many/one_to_many_from_bidict.h" +#include "utils/orthotope/dim_coord.h" +#include "utils/orthotope/dim_domain.dtg.h" +#include "utils/orthotope/down_projection.dtg.h" +#include "utils/orthotope/eq_projection.dtg.h" +#include "utils/orthotope/up_projection.dtg.h" +#include "utils/containers/is_subseteq_of.h" + +namespace FlexFlow { + +template +UpProjection make_empty_up_projection() { + return UpProjection{OneToMany{}}; +} + +template +std::unordered_set + input_dims_of_up_projection(UpProjection const &projection) { + return projection.dim_mapping.left_values(); +} + +template +std::unordered_set + output_dims_of_up_projection(UpProjection const &projection) { + return projection.dim_mapping.right_values(); +} + +template +DimCoord compute_up_projection(UpProjection const &projection, + DimCoord const &coord, + DimDomain const &output_domain, + DimOrdering const &output_dim_ordering) { + std::unordered_set input_dims = input_dims_of_up_projection(projection); + std::unordered_set coord_dims = get_coord_dims(coord); + ASSERT(input_dims == coord_dims, + "compute_up_projection expected coord dimensions to match projection " + "input dimensions"); + + std::unordered_set output_dims = output_dims_of_up_projection(projection); + std::unordered_set output_domain_dims = get_domain_dims(output_domain); + ASSERT(is_subseteq_of(output_dims, output_domain_dims)); + + DimCoord unlifted = DimCoord{ + flatmap(coord.raw, + [&](L const &input_dim, nonnegative_int input_dim_val) { + std::unordered_set dst_dims = + projection.dim_mapping.at_l(input_dim); + + DimDomain dst_domain = + restrict_domain_to_dims(output_domain, dst_dims); + + DimCoord dst_coord = unflatten_dim_coord( + input_dim_val, dst_domain, output_dim_ordering); + + return dst_coord.raw; + }), + }; + + return unlifted; +} + +template +void project_dims(UpProjection &proj, + L const &onto, + std::unordered_set const &from) { + for (R const &r : from) { + proj.dim_mapping.insert({onto, r}); + } +} + +template +DownProjection invert_up_projection(UpProjection const &up_proj) { + return DownProjection{ + invert_one_to_many(up_proj.dim_mapping), + }; +} + +template +UpProjection compose_up_projections(UpProjection const &fst, + UpProjection const &snd) { + return UpProjection{ + exhaustive_relational_join(fst.dim_mapping, snd.dim_mapping), + }; +} + +template +UpProjection up_from_eq_proj(EqProjection const &eq) { + return UpProjection{ + one_to_many_from_bidict(eq.dim_mapping), + }; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/orthotope/up_projection.struct.toml b/lib/utils/include/utils/orthotope/up_projection.struct.toml new file mode 100644 index 0000000000..a5ec5acec5 --- /dev/null +++ b/lib/utils/include/utils/orthotope/up_projection.struct.toml @@ -0,0 +1,19 @@ +namespace = "FlexFlow" +name = "UpProjection" +features = [ + "eq", + "hash", + "fmt", +] + +template_params = [ + "L", "R" +] + +includes = [ + "utils/one_to_many/one_to_many.h", +] + +[[fields]] +name = "dim_mapping" +type = "::FlexFlow::OneToMany" diff --git a/lib/utils/include/utils/positive_int/positive_int.h b/lib/utils/include/utils/positive_int/positive_int.h index 6ddddadf50..6d97e21cd6 100644 --- a/lib/utils/include/utils/positive_int/positive_int.h +++ b/lib/utils/include/utils/positive_int/positive_int.h @@ -56,6 +56,8 @@ struct positive_int { positive_int &operator+=(positive_int other); positive_int &operator+=(nonnegative_int other); + friend positive_int operator+(nonnegative_int lhs, positive_int rhs); + positive_int operator*(positive_int other) const; positive_int &operator*=(positive_int other); nonnegative_int operator*(nonnegative_int other) const; diff --git a/lib/utils/include/utils/sequence.h b/lib/utils/include/utils/sequence.h index 07e4554299..26ed4a55f9 100644 --- a/lib/utils/include/utils/sequence.h +++ b/lib/utils/include/utils/sequence.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_UTILS_INCLUDE_UTILS_SEQUENCE_H #include "utils/tuple.h" -#include "utils/visitable_core.h" #include #include @@ -79,13 +78,6 @@ using seq_enumerate_t = typename seq_enumerate::type; template struct seq_transform_type; -template -struct seq_transform_type> - : tuple_prepend_type< - visit_struct::traits::clean_t()( - std::declval>()))>, - typename seq_transform_type>::type> {}; - template struct seq_transform_type> { using type = std::tuple<>; diff --git a/lib/utils/include/utils/stack_vector/stack_vector.h b/lib/utils/include/utils/stack_vector/stack_vector.h index 64d005a10e..872173389f 100644 --- a/lib/utils/include/utils/stack_vector/stack_vector.h +++ b/lib/utils/include/utils/stack_vector/stack_vector.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_UTILS_STACK_VECTOR_H #define _FLEXFLOW_UTILS_STACK_VECTOR_H +#include "utils/check_fmtable.h" #include "utils/hash-utils.h" #include "utils/join_strings.h" #include "utils/test_types.h" diff --git a/lib/utils/include/utils/type_traits.h b/lib/utils/include/utils/type_traits.h index 7abb3ffd5b..2f07050527 100644 --- a/lib/utils/include/utils/type_traits.h +++ b/lib/utils/include/utils/type_traits.h @@ -3,7 +3,6 @@ #include "utils/metafunction.h" #include "utils/type_traits_core.h" -#include "utils/visitable_core.h" #include #include @@ -81,13 +80,6 @@ struct elements_satisfy { "than 1 argument"); }; -template