Use common serialize/deserialize functions for both components

MasterSkepticista · MasterSkepticista · commit 3c528abd698a · 2025-05-16T13:13:22.000+05:30
Signed-off-by: Shah, Karan &lt;kbshah1998@outlook.com&gt;
diff --git a/openfl/component/aggregator/aggregator.py b/openfl/component/aggregator/aggregator.py
@@ -627,24 +627,6 @@ def get_aggregated_tensor(
             raise ValueError(f"Aggregator does not have `{tensor_key}`")
 
         # Serialize (and compress) the tensor
-        named_tensor = self.serialize_tensor(tensor_key, nparray, lossless=require_lossless)
-        return named_tensor
-
-    def serialize_tensor(self, tensor_key, nparray, lossless: bool):
-        """Serialize the tensor.
-
-        This function also performs compression.
-
-        Args:
-            tensor_key (namedtuple): A TensorKey.
-            nparray: A NumPy array associated with the requested
-                tensor key.
-            lossless: Whether to use lossless compression.
-
-        Returns:
-            named_tensor (protobuf) : The tensor constructed from the nparray.
-        """
-        # Secure aggregation setup tensor.
         if "secagg" in tensor_key.tags:
             import numpy as np
 
@@ -663,14 +645,9 @@ def default(self, obj):
 
             return named_tensor
 
-        tensor_key, nparray, metadata = self.tensor_codec.compress(tensor_key, nparray, lossless)
-        named_tensor = utils.construct_named_tensor(
-            tensor_key,
-            nparray,
-            metadata,
-            lossless,
+        named_tensor = utils.serialize_tensor(
+            tensor_key, nparray, self.tensor_codec, lossless=require_lossless
         )
-
         return named_tensor
 
     def _collaborator_task_completed(self, collaborator, task_name, round_num):
@@ -769,43 +746,47 @@ def process_task_results(
             self._is_collaborator_done(collaborator_name, round_number)
             self._end_of_round_with_stragglers_check()
 
-        task_key = TaskResultKey(task_name, collaborator_name, round_number)
-
-        # we mustn't have results already
         if self._collaborator_task_completed(collaborator_name, task_name, round_number):
             logger.warning(
-                f"Aggregator already has task results from collaborator {collaborator_name}"
-                f" for task {task_key}"
+                f"Aggregator already has task results from collaborator {collaborator_name} "
+                f"for task {task_name} in round {round_number}. Ignoring..."
             )
             return
 
-        # By giving task_key it's own weight, we can support different
-        # training/validation weights
-        # As well as eventually supporting weights that change by round
-        # (if more data is added)
+        # Record collaborator individual weightage/contribution for federated averaging
+        task_key = TaskResultKey(task_name, collaborator_name, round_number)
         self.collaborator_task_weight[task_key] = data_size
 
-        # initialize the list of tensors that go with this task
-        # Setting these incrementally is leading to missing values
+        # Process named tensors
         task_results = []
-
+        result_tensor_dict = {}
         for named_tensor in named_tensors:
-            tensor_key, value = self.deserialize_tensor(named_tensor, collaborator_name)
+            # Deserialize
+            tensor_key, nparray = utils.deserialize_tensor(named_tensor, self.tensor_codec)
+
+            # Update origin/tags
+            updated_tags = change_tags(tensor_key.tags, add_field=collaborator_name)
+            tensor_key = tensor_key._replace(origin=self.uuid, tags=updated_tags)
+
+            # Record
+            result_tensor_dict[tensor_key] = nparray
+            task_results.append(tensor_key)
 
             if "metric" in tensor_key.tags:
-                # Caution: This schema must be followed. It is also used in
-                # gRPC message streams for director/envoy.
+                assert nparray.ndim == 0, (
+                    f"Expected metric to be a scalar, got shape {nparray.shape}"
+                )
                 metrics = {
                     "round": round_number,
                     "metric_origin": collaborator_name,
                     "task_name": task_name,
                     "metric_name": tensor_key.tensor_name,
-                    "metric_value": float(value),
+                    "metric_value": float(nparray),
                 }
                 self.metric_queue.put(metrics)
 
-            task_results.append(tensor_key)
-
+        # Store results in TensorDB
+        self.tensor_db.cache_tensor(result_tensor_dict)
         self.collaborator_tasks_results[task_key] = task_results
 
         # Check if collaborator or round is done.
@@ -832,48 +813,6 @@ def _end_of_round_with_stragglers_check(self):
                 logger.warning(f"Identified stragglers: {self.stragglers}")
             self._end_of_round_check()
 
-    def deserialize_tensor(self, named_tensor, collaborator_name):
-        """Deserialize a `NamedTensor` to a numpy array.
-
-        This function also performs decompresssion.
-
-        Args:
-            named_tensor (protobuf): The tensor to convert to nparray.
-
-        Returns:
-            A tuple (TensorKey, nparray).
-        """
-        metadata = [
-            {
-                "int_to_float": proto.int_to_float,
-                "int_list": proto.int_list,
-                "bool_list": proto.bool_list,
-            }
-            for proto in named_tensor.transformer_metadata
-        ]
-        # The tensor has already been transferred to aggregator,
-        # so the newly constructed tensor should have the aggregator origin
-        tensor_key = TensorKey(
-            named_tensor.name,
-            self.uuid,
-            named_tensor.round_number,
-            named_tensor.report,
-            tuple(named_tensor.tags),
-        )
-
-        tensor_key, nparray = self.tensor_codec.decompress(
-            tensor_key,
-            data=named_tensor.data_bytes,
-            transformer_metadata=metadata,
-            require_lossless=named_tensor.lossless,
-        )
-        updated_tags = change_tags(tensor_key.tags, add_field=collaborator_name)
-        tensor_key = tensor_key._replace(tags=updated_tags)
-
-        self.tensor_db.cache_tensor({tensor_key: nparray})
-
-        return tensor_key, nparray
-
     def _prepare_trained(self, tensor_name, origin, round_number, report, agg_results):
         """Prepare aggregated tensorkey tags.
 
diff --git a/openfl/component/collaborator/collaborator.py b/openfl/component/collaborator/collaborator.py
@@ -283,8 +283,12 @@ def fetch_tensors_from_aggregator(self, tensor_keys: List[TensorKey]):
         if len(tensor_keys) > 0:
             logger.info("Fetching %d tensors from the aggregator", len(tensor_keys))
             named_tensors = self.client.get_aggregated_tensors(tensor_keys, require_lossless=True)
+
+            # Deserialize tensors and mark them as coming from the aggregator.
             for tensor_key, named_tensor in zip(tensor_keys, named_tensors):
-                tensor_dict[tensor_key] = self.deserialize_tensor(named_tensor)
+                tensor_key, nparray = utils.deserialize_tensor(named_tensor, self.tensor_codec)
+                tensor_key = tensor_key._replace(origin=self.aggregator_uuid)
+                tensor_dict[tensor_key] = nparray
 
         self.tensor_db.cache_tensor(tensor_dict)
 
@@ -322,7 +326,10 @@ def send_task_results(self, tensor_dict, round_number, task_name) -> dict:
                 metrics.update({f"{self.collaborator_name}/{task_name}/{tensor_name}": value})
 
         # Serialize tensors to be sent to the aggregator
-        named_tensors = [self.serialize_tensor(k, v) for k, v in tensor_dict.items()]
+        named_tensors = [
+            utils.serialize_tensor(k, v, self.tensor_codec, lossless=True)
+            for k, v in tensor_dict.items()
+        ]
 
         self.client.send_local_task_results(
             round_number,
@@ -333,71 +340,6 @@ def send_task_results(self, tensor_dict, round_number, task_name) -> dict:
 
         return metrics
 
-    def serialize_tensor(self, tensor_key, nparray):
-        """Serialize the tensor.
-
-        This function also performs compression.
-
-        Args:
-            tensor_key (namedtuple): A TensorKey.
-            nparray: A NumPy array associated with the requested
-                tensor key.
-
-        Returns:
-            named_tensor (protobuf) : The tensor constructed from the nparray.
-        """
-        lossless = True
-        tensor_key, nparray, metadata = self.tensor_codec.compress(
-            tensor_key,
-            nparray,
-            lossless,
-        )
-        named_tensor = utils.construct_named_tensor(
-            tensor_key,
-            nparray,
-            metadata,
-            lossless,
-        )
-        return named_tensor
-
-    def deserialize_tensor(self, named_tensor):
-        """Deserialize a `NamedTensor` to a numpy array.
-
-        This function also performs decompresssion.
-
-        Args:
-            named_tensor (protobuf): The tensor to convert to nparray.
-
-        Returns:
-            The converted nparray.
-        """
-        metadata = [
-            {
-                "int_to_float": proto.int_to_float,
-                "int_list": proto.int_list,
-                "bool_list": proto.bool_list,
-            }
-            for proto in named_tensor.transformer_metadata
-        ]
-        # The tensor has already been transferred to collaborator, so
-        # the newly constructed tensor should have the collaborator origin
-        tensor_key = TensorKey(
-            named_tensor.name,
-            self.collaborator_name,
-            named_tensor.round_number,
-            named_tensor.report,
-            tuple(named_tensor.tags),
-        )
-
-        tensor_key, nparray = self.tensor_codec.decompress(
-            tensor_key,
-            data=named_tensor.data_bytes,
-            transformer_metadata=metadata,
-            require_lossless=named_tensor.lossless,
-        )
-
-        return nparray
-
     def _apply_masks(
         self,
         tensor_dict,
diff --git a/openfl/protocols/utils.py b/openfl/protocols/utils.py
@@ -352,3 +352,77 @@ def get_headers(context) -> dict:
             values are the corresponding header values.
     """
     return {header[0]: header[1] for header in context.invocation_metadata()}
+
+
+def serialize_tensor(tensor_key, nparray, tensor_codec, lossless=True):
+    """Serialize the tensor.
+
+    This function also performs compression.
+
+    Args:
+        tensor_key (namedtuple): A TensorKey.
+        nparray: A NumPy array associated with the requested
+            tensor key.
+        tensor_codec: The codec to use for compression.
+        lossless: A flag indicating whether to use lossless compression.
+
+    Returns:
+        named_tensor (protobuf) : The tensor constructed from the nparray.
+    """
+    tensor_key, nparray, metadata = tensor_codec.compress(
+        tensor_key,
+        nparray,
+        lossless,
+    )
+    named_tensor = construct_named_tensor(
+        tensor_key,
+        nparray,
+        metadata,
+        lossless,
+    )
+    return named_tensor
+
+
+def deserialize_tensor(named_tensor, tensor_codec):
+    """Deserialize a `NamedTensor` to a numpy array.
+
+    This function also performs decompresssion. Whether or not the
+    decompression is lossless is determined by the `lossless` field
+    of the `NamedTensor` protobuf.
+
+    Args:
+        named_tensor (protobuf): The tensor to convert to nparray.
+        tensor_codec: The codec to use for decompression.
+
+    Returns:
+        A tuple (TensorKey, nparray), where the `origin` field of the
+        `TensorKey` is `None`. The `origin` field must be populated
+        later, as it is not known at this point.
+    """
+    metadata = [
+        {
+            "int_to_float": proto.int_to_float,
+            "int_list": proto.int_list,
+            "bool_list": proto.bool_list,
+        }
+        for proto in named_tensor.transformer_metadata
+    ]
+
+    # Deserialization happens on the receiving end.
+    # Origin of this tensor is populated later.
+    tensor_key = TensorKey(
+        named_tensor.name,
+        None,
+        named_tensor.round_number,
+        named_tensor.report,
+        tuple(named_tensor.tags),
+    )
+
+    tensor_key, nparray = tensor_codec.decompress(
+        tensor_key,
+        data=named_tensor.data_bytes,
+        transformer_metadata=metadata,
+        require_lossless=named_tensor.lossless,
+    )
+
+    return tensor_key, nparray