diff --git a/NeoML/include/NeoML/Dnn/Dnn.h b/NeoML/include/NeoML/Dnn/Dnn.h
index e461ae73a..c8d7f11ca 100644
--- a/NeoML/include/NeoML/Dnn/Dnn.h
+++ b/NeoML/include/NeoML/Dnn/Dnn.h
@@ -98,20 +98,18 @@ struct CDnnLayerLink {
 	int OutputNumber;
 
 	// Default value for optional inputs.
-	CDnnLayerLink() : Layer( 0 ), OutputNumber( -1 ) {}
-	CDnnLayerLink( const CDnnLayerLink& other ) :
-		Layer( other.Layer ), OutputNumber( other.OutputNumber ) {}
-	CDnnLayerLink( CBaseLayer* layer, int outputNumber ) :
+	CDnnLayerLink() : Layer( nullptr ), OutputNumber( -1 ) {}
+	CDnnLayerLink( CBaseLayer* layer, int outputNumber = 0 ) :
 		Layer( layer ),
 		OutputNumber( outputNumber )
 	{
-		NeoAssert( Layer != 0 );
+		NeoAssert( Layer != nullptr );
 		NeoAssert( OutputNumber >= 0 );
 	}
-
-	// Converting constructor
-	CDnnLayerLink( CBaseLayer* layer ) :
-		Layer( layer ), OutputNumber( 0 ) {}
+	CDnnLayerLink( CDnnLayerLink&& ) = default;
+	CDnnLayerLink( const CDnnLayerLink& other ) :
+		Layer( other.Layer ), OutputNumber( other.OutputNumber )
+	{}
 
 	// Is this layer optional, i.e. created by CLayerOutout() default constructor.
 	bool IsOptional() const { return Layer == 0 && OutputNumber == -1; }
@@ -149,7 +147,10 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	//
 	// e.g. layer "InputHidden" inside of CLstmLayer named "LSTM", which is inside of CCompositeLayer named "Encoder"
 	// has path "Encoder/LSTM/InputHidden"
-	CString GetPath() const;
+	CString GetPath( const char* sep = "/" ) const;
+	// Path in form suitable for dnn->GetLayer( CArray<CString>& path );
+	// Returns an empty array if the path cannot be constructed.
+	void GetPath( CArray<CString>& path ) const;
 
 	// Connects this layer's inputNumber input to the specified layer's outputNumber output
 	virtual void Connect( int inputNumber, const char* layer, int outputNumber = 0 );
@@ -390,7 +391,8 @@ class NEOML_API CBaseLayer : public virtual IObject {
 
 	// Set the 'dist' layer's paramBlobs to point to the data of this layer's paramBlobs
 	void transferParamsBlob(CBaseLayer& dist) const;
-
+	// Technical method for recursion in GetPath( CArray<CString>& path )
+	void getPath( CArray<CString>& path ) const;
 	// Switches the specified blobs into sequence processing mode
 	void switchBlobsToSequentialMode(CObjectArray<CDnnBlob>& blobs, TBlobCacheType cacheType, bool storeParent);
 	void switchBlobsToNonSequentialMode(CObjectArray<CDnnBlob>& blobs, TBlobCacheType cacheType, bool clear);
@@ -432,6 +434,7 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	friend class CDnnLayerGraph;
 	friend class CDnnSolver;
 	friend class CCompositeLayer;
+	friend class CDnnHeadAdapterLayer;
 };
 
 //------------------------------------------------------------------------------------------------------------
@@ -684,6 +687,7 @@ class NEOML_API CDnn : public CDnnLayerGraph {
 	friend class CCompositeLayer;
 	friend class CRecurrentLayer;
 	friend class CDnnReferenceRegister;
+	friend class CDnnHeadAdapterLayer;
 };
 
 inline CArchive& operator<<( CArchive& archive, const CDnn& dnn)
diff --git a/NeoML/include/NeoML/Dnn/Dnn.inl b/NeoML/include/NeoML/Dnn/Dnn.inl
index 497019ab7..81123b43a 100644
--- a/NeoML/include/NeoML/Dnn/Dnn.inl
+++ b/NeoML/include/NeoML/Dnn/Dnn.inl
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -56,9 +56,26 @@ inline bool CBaseLayer::IsBackwardNeeded() const
 	return isBackwardNeeded == BS_NeedsBackward;
 }
 
-inline CString CBaseLayer::GetPath() const
+inline CString CBaseLayer::GetPath( const char* sep ) const
 {
-	return dnn == nullptr || dnn->owner == nullptr ? name : dnn->owner->GetPath() + "/" + name;
+	return ( dnn == nullptr || dnn->owner == nullptr ) ? name : ( dnn->owner->GetPath( sep ) + sep + name );
+}
+
+inline void CBaseLayer::GetPath( CArray<CString>& path ) const
+{
+	path.DeleteAll();
+	getPath( path );
+}
+
+inline void CBaseLayer::getPath( CArray<CString>& path ) const
+{
+	if( dnn == nullptr ) {
+		return;
+	}
+	if( dnn->owner != nullptr ) {
+		dnn->owner->getPath( path );
+	}
+	path.Add( name );
 }
 
 inline void CBaseLayer::CheckLayerArchitecture( bool expr, const char* message ) const
diff --git a/NeoML/include/NeoML/Dnn/DnnHead.h b/NeoML/include/NeoML/Dnn/DnnHead.h
new file mode 100644
index 000000000..05f4b5cfc
--- /dev/null
+++ b/NeoML/include/NeoML/Dnn/DnnHead.h
@@ -0,0 +1,100 @@
+/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <initializer_list>
+#include <NeoML/NeoML.h>
+#include <NeoML/Dnn/Dnn.h>
+#include <NeoML/Dnn/Layers/CompositeLayer.h>
+
+namespace NeoML {
+
+template <typename T>
+class CLayerWrapper;
+class CDnnHeadAdapterLayer;
+class CGraph;
+
+namespace optimization {
+int OptimizeDnnHeadAdapters( NeoML::CGraph& );
+}
+
+class CDnnHead : public IObject {
+public:
+	CDnnHead() = default;
+
+	template <typename... Ts>
+	CDnnHead(CRandom& random, IMathEngine& mathEngine, CLayerWrapper<Ts>... linearWrappers)
+	{
+		CDnn* head(new CDnn(random, mathEngine));
+
+		CPtr<CCompositeSourceLayer> source = new CCompositeSourceLayer(head->GetMathEngine());
+		source->SetName("source");
+		head->AddLayer(*source);
+		CBaseLayer* inputLayer = source;
+
+		// chain connect wrapped layers
+		using TExpanding = CBaseLayer * [];
+		TExpanding{ inputLayer = linearWrappers(inputLayer)... };
+
+		CPtr<CCompositeSinkLayer> headSink = new CCompositeSinkLayer(head->GetMathEngine());
+		headSink->SetName("sink");
+		head->AddLayer(*headSink);
+		headSink->Connect(0, *(inputLayer));
+		dnn = head;
+	}
+
+	CDnn& GetDnn() { return *dnn; }
+
+private:
+	~CDnnHead() override
+	{
+		if( dnn != nullptr ) {
+			delete dnn;
+			dnn = nullptr;
+		}
+	}
+
+	void increment()
+	{
+		if( ++headCounter == connections.Size() ) {
+			headCounter = 0;
+			firstAdapterNum = -1;
+		}
+	}
+
+	CDnn* dnn = nullptr;
+
+	// Stores all adapter using this head
+	CObjectArray<CDnnHeadAdapterLayer> connections;
+	// Layers for which input/output blobs are stored for Backward/Learn
+	CArray<CBaseLayer*> inputLayers;
+	CArray<CBaseLayer*> outputLayers;
+	// Pointers to source/sink layers of inner network
+	CCompositeSourceLayer* sourceLayer = nullptr;
+	CCompositeSinkLayer* sinkLayer = nullptr;
+	// Which of the blobs will be used during backward
+	int blobsForBackward = 0;
+	// Which of the blobs will be used during learn
+	int blobsForLearn = 0;
+
+	int headCounter = 0;
+	int firstAdapterNum = -1;
+
+	friend class CDnnHeadAdapterLayer;
+	friend int optimization::OptimizeDnnHeadAdapters( CGraph& );
+};
+
+} // namespace NeoML
diff --git a/NeoML/include/NeoML/Dnn/DnnOptimization.h b/NeoML/include/NeoML/Dnn/DnnOptimization.h
index 7e7d157c0..aef0cf9a6 100644
--- a/NeoML/include/NeoML/Dnn/DnnOptimization.h
+++ b/NeoML/include/NeoML/Dnn/DnnOptimization.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,6 +23,8 @@ class CDnn;
 
 // Struct which contains the details of optimization result
 struct NEOML_API CDnnOptimizationReport final {
+	// Number of HeadAdapters layers which internal dnn is optimized
+	int OptimizedHeadAdapters = 0;
 	// Number of composite layers which where unpacked
 	// (unpack == content of the layer moved to the root CDnn, composite itself is removed)
 	int UnpackedCompositeLayers = 0;
@@ -51,7 +53,8 @@ struct NEOML_API CDnnOptimizationReport final {
 // Check for is any optimization succeed
 inline bool CDnnOptimizationReport::IsOptimized() const
 {
-	return UnpackedCompositeLayers > 0
+	return OptimizedHeadAdapters > 0
+		|| UnpackedCompositeLayers > 0
 		|| RemovedTrivialLayers > 0
 		|| FusedBatchNormalizations > 0
 		|| ChannelwiseWith1x1NonResidual > 0
diff --git a/NeoML/include/NeoML/Dnn/DnnSolver.h b/NeoML/include/NeoML/Dnn/DnnSolver.h
index 7a323f734..8d27bca4e 100644
--- a/NeoML/include/NeoML/Dnn/DnnSolver.h
+++ b/NeoML/include/NeoML/Dnn/DnnSolver.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ class NEOML_API CDnnSolver : virtual public IObject {
 	void SetMinMaxGradientClipping( float min, float max ) { clipGradientMin = min; clipGradientMax = max; }
 
 	// Serialize to archive
-	virtual void Serialize( CArchive& archive, CDnn& dnn );
+	virtual void Serialize( CArchive& archive, const CDnn& dnn );
 
 protected:
 	explicit CDnnSolver( IMathEngine& mathEngine );
@@ -86,18 +86,17 @@ class NEOML_API CDnnSolver : virtual public IObject {
 	float clipGradientMax;
 
 	// The blobs sum
-	struct CDiffBlobSum {
-		CDiffBlobSum() : Count( 0 ) {}
-
-		CObjectArray<CDnnBlob> Sum; // the blobs sums
-		int Count; // the number of terms in each sum
+	struct CDiffBlobSum final {
+		const CBaseLayer* LayerOwner{}; // for the given layer
+		CObjectArray<CDnnBlob> Sum{}; // the blobs sums
+		int Count{}; // the number of terms in each sum
 	};
 
 	// The buffers used to add up the gradients from several AddDiff calls
-	CMap<CBaseLayer*, CDiffBlobSum> layerToParamDiffBlobsSum;
+	CMap<CString, CDiffBlobSum> layerToParamDiffBlobsSum;
 	// The buffers for storing gradients history and moment
 	// Used in the inheriting classes
-	CMap<CBaseLayer*, CObjectArray<CDnnBlob>> layerToGradientHistory;
+	CMap<CString, CObjectArray<CDnnBlob>> layerToGradientHistory;
 	// Layers which require reduction across distributed solver
 	CHashTable<CBaseLayer*> layersToReduce; // Fast check if layer is included already
 	CArray<CBaseLayer*> reduceOrder; // Correct order across all of the distributed nets
@@ -112,6 +111,10 @@ class NEOML_API CDnnSolver : virtual public IObject {
 
 	// Telling the compiler that we intentionally using two-parameter Serialize instead of one declared in IObject
 	using IObject::Serialize;
+	// Convert maps from the previous serialization format
+	void loadPrevVersionDnnSolverMaps( CArchive& archive, const CDnn& dnn );
+
+	friend class CDnnHeadAdapterLayer;
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////
@@ -170,7 +173,7 @@ class NEOML_API CDnnSimpleGradientSolver : public CDnnSolver {
 	bool IsInCompatibilityMode() const { return isInCompatibilityMode; }
 	void SetCompatibilityMode( bool compatibilityMode ) { isInCompatibilityMode = compatibilityMode; }
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs, 
@@ -234,7 +237,7 @@ class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver {
 	// May be called only before training starts.
 	void EnableDecoupledWeightDecay( bool enable );
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	// Resets to the initial state
@@ -335,7 +338,7 @@ class NEOML_API CDnnNesterovGradientSolver : public CDnnSolver {
 	// May be called only before training starts.
 	void EnableDecoupledWeightDecay( bool enable );
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	// Resets to the initial state
@@ -482,7 +485,7 @@ class NEOML_API CDnnLambGradientSolver : public CDnnSolver {
 	bool GetUseNVLamb() const { return useNvLamb; }
 	void SetUseNVLamb( bool value ) { useNvLamb = value; }
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs,
diff --git a/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h b/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h
new file mode 100644
index 000000000..a4909dc21
--- /dev/null
+++ b/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h
@@ -0,0 +1,83 @@
+/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <memory>
+#include <NeoML/NeoMLDefs.h>
+#include <NeoML/Dnn/Dnn.h>
+#include <NeoML/Dnn/DnnHead.h>
+
+namespace NeoML {
+
+// CDnnHeadAdapterLayer passes data blobs between multiple external layers and a shared internal DNN (head)
+// Unlike CompositeLayer, it allows to connect several external layers to same head
+class NEOML_API CDnnHeadAdapterLayer final : public CBaseLayer {
+	NEOML_DNN_LAYER( CDnnHeadAdapterLayer )
+public:
+	explicit CDnnHeadAdapterLayer( IMathEngine& mathEngine, const char* name = nullptr )
+		: CBaseLayer( mathEngine, name == nullptr ? "CDnnHeadAdapterLayer" : name, /*isLearnable*/true )
+	{}
+
+	void Serialize( CArchive& archive ) override;
+
+	// Internal shared Dnn between DnnHeadAdapters
+	void SetDnnHead( CPtr<CDnnHead> head );
+
+	// Get Dnn head
+	const CDnnHead* GetDnnHead() const { return head; };
+	CDnnHead* GetDnnHead() { return head; };
+
+protected:
+	void Reshape() override;
+	void RunOnce() override;
+	void BackwardOnce() override;
+	void LearnOnce() override;
+	// It does not allocate outputBlobs in CBaseLayer in runOnce, because they are not used for inference.
+	// The outputBlob for CDnnHeadAdapterLayer are sinkLayer->GetBlob() of its internalDnn.
+	void AllocateOutputBlobs() override {}
+	int BlobsForBackward() const override { return head->blobsForBackward; }
+	int BlobsForLearn() const override { return head->blobsForLearn; }
+
+private:
+	// Pointer to HeadLayer with inner dnn
+	CPtr<CDnnHead> head = nullptr;
+	// Save first adapter name to connect to necessary head in serialization
+	CString firstAdapter;
+	// Stores the number of the layer connected to the internal network
+	int num = -1;
+	// Temporarily used to store layers during serialization
+	CObjectArray<CBaseLayer> layers;
+	// Stores the input/output blobs from last Inference
+	CObjectArray<CDnnBlob> innerInputBlobs;
+	CObjectArray<CDnnBlob> innerOutputBlobs;
+
+	void OnDnnChanged( CDnn* ) override;
+	void processBackwardOrLearn();
+	void configureAdapter();
+	void configureFromHead();
+	void saveBlobs();
+	void loadBlobs();
+	void configureForBackwardAndLearn();
+};
+
+inline NEOML_API CLayerWrapper<CDnnHeadAdapterLayer> DnnHeadAdapter( CDnnHead* head )
+{
+	return CLayerWrapper<CDnnHeadAdapterLayer>( "DnnHeadAdapter", [=]( CDnnHeadAdapterLayer* result ) {
+		result->SetDnnHead( head );
+	} );
+}
+
+} // namespace NeoML
diff --git a/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h b/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h
index d1c9e029e..4065f350a 100644
--- a/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h
+++ b/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -43,6 +43,9 @@ class NEOML_API CDropoutLayer : public CBaseInPlaceLayer {
 	bool IsBatchwise() const { return isBatchwise; }
 	void SetBatchwise( bool value );
 
+	// Set number of connected HeadAdapterLayers
+	void SetHeadCounter(int num) { headConnections = num; }
+
 protected:
 	~CDropoutLayer() override { destroyDropoutDesc(); }
 
@@ -58,6 +61,9 @@ class NEOML_API CDropoutLayer : public CBaseInPlaceLayer {
 	bool isSpatial; // the spatial mode (channel-wise)
 	bool isBatchwise; // the batchwise mode
 
+	int headConnections = 1; // stores number of HeadAdapter's using the dropout
+	int headCount = 0; // counter for BackwardOnce(), if headCount == headConnections, delete desc
+
 	void initDropoutDesc();
 	void destroyDropoutDesc();
 };
diff --git a/NeoML/src/CMakeLists.txt b/NeoML/src/CMakeLists.txt
index ba17e7a1e..1986e2771 100644
--- a/NeoML/src/CMakeLists.txt
+++ b/NeoML/src/CMakeLists.txt
@@ -116,6 +116,7 @@ set(NeoML_SOURCES
     Dnn/Layers/CtcLayer.cpp
     Dnn/Layers/CumSumLayer.cpp
     Dnn/Layers/DepthToSpaceLayer.cpp
+    Dnn/Layers/DnnHeadAdapterLayer.cpp
     Dnn/Layers/DotProductLayer.cpp
     Dnn/Layers/EnumBinarizationLayer.cpp
     Dnn/Layers/FocalLossLayer.cpp
@@ -266,6 +267,7 @@ set(NeoML_HEADERS_COMPACT
     ../include/NeoML/Dnn/Dnn.inl
     ../include/NeoML/Dnn/DnnBlob.h
     ../include/NeoML/Dnn/DnnInitializer.h
+    ../include/NeoML/Dnn/DnnHead.h
     ../include/NeoML/Dnn/DnnLambdaHolder.h
     ../include/NeoML/Dnn/DnnSolver.h
     ../include/NeoML/Dnn/DnnSparseMatrix.h
@@ -282,6 +284,7 @@ set(NeoML_HEADERS_COMPACT
     ../include/NeoML/Dnn/Layers/ConcatLayer.h
     ../include/NeoML/Dnn/Layers/ConvLayer.h
     ../include/NeoML/Dnn/Layers/DataLayer.h
+    ../include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h
     ../include/NeoML/Dnn/Layers/DropoutLayer.h
     ../include/NeoML/Dnn/Layers/EltwiseLayer.h
     ../include/NeoML/Dnn/Layers/FullyConnectedLayer.h
diff --git a/NeoML/src/Dnn/Dnn.cpp b/NeoML/src/Dnn/Dnn.cpp
index 47855b52f..51f608db6 100644
--- a/NeoML/src/Dnn/Dnn.cpp
+++ b/NeoML/src/Dnn/Dnn.cpp
@@ -71,6 +71,7 @@ limitations under the License.
 #include <NeoML/Dnn/Layers/CtcLayer.h>
 #include <NeoML/Dnn/Layers/CumSumLayer.h>
 #include <NeoML/Dnn/Layers/DepthToSpaceLayer.h>
+#include <NeoML/Dnn/Layers/DnnHeadAdapterLayer.h>
 #include <NeoML/Dnn/Layers/DotProductLayer.h>
 #include <NeoML/Dnn/Layers/EnumBinarizationLayer.h>
 #include <NeoML/Dnn/Layers/FocalLossLayer.h>
@@ -349,6 +350,7 @@ REGISTER_NEOML_LAYER( CCrfLayer, "FmlCnnCrfLayer" )
 REGISTER_NEOML_LAYER( CCrfLossLayer, "FmlCnnCrfLossLayer" )
 REGISTER_NEOML_LAYER( CCtcDecodingLayer, "FmlCnnCtcDecodingLayer" )
 REGISTER_NEOML_LAYER( CCtcLossLayer, "FmlCnnCtcLossLayer" )
+REGISTER_NEOML_LAYER( CDnnHeadAdapterLayer, "NeoMLDnnHeadAdapterLayer" )
 REGISTER_NEOML_LAYER( CDotProductLayer, "FmlCnnDotProductLayer" )
 REGISTER_NEOML_LAYER( CEnumBinarizationLayer, "FmlCnnEnumBinarizationLayer" )
 REGISTER_NEOML_LAYER( CGlobalMaxPoolingLayer, "FmlCnnGlobalMaxPoolingLayer" )
diff --git a/NeoML/src/Dnn/DnnOptimization.cpp b/NeoML/src/Dnn/DnnOptimization.cpp
index 4ab59a52b..580965c95 100644
--- a/NeoML/src/Dnn/DnnOptimization.cpp
+++ b/NeoML/src/Dnn/DnnOptimization.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ limitations under the License.
 #include "Optimization/MobileNetV3Optimizer.h"
 #include "Optimization/OptimizerFunctions.h"
 #include <NeoML/Dnn/Layers/RowwiseOperationChainLayer.h>
+#include <NeoML/Dnn/Layers/DnnHeadAdapterLayer.h>
 #include <NeoML/Dnn/Dnn.h>
 
 namespace NeoML {
@@ -33,6 +34,7 @@ CDnnOptimizationReport OptimizeDnn( CDnn& dnn, const CDnnOptimizationSettings& s
 	CDnnOptimizationReport report;
 	optimization::CGraph graph( dnn );
 
+	report.OptimizedHeadAdapters = optimization::OptimizeDnnHeadAdapters( graph );
 	report.UnpackedCompositeLayers = optimization::UnpackComposites( graph );
 	report.RemovedTrivialLayers = optimization::RemoveTrivialLayers( graph );
 	optimization::CBatchNormFusionOptimizer( graph ).Apply( report );
diff --git a/NeoML/src/Dnn/DnnSolver.cpp b/NeoML/src/Dnn/DnnSolver.cpp
index 3a2b09a50..99bebc9b2 100644
--- a/NeoML/src/Dnn/DnnSolver.cpp
+++ b/NeoML/src/Dnn/DnnSolver.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -125,37 +125,8 @@ REGISTER_NEOML_SOLVER( CDnnLambGradientSolver, "NeoMLDnnLambGradientSolver" )
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-// Utility functions for serialization
 
-void mapLayerIdToPtr( CDnnLayerGraph& dnn, CMap<CString, CBaseLayer*>& result, const CString& prefix = "" )
-{
-	CArray<const char*> layerNames;
-	dnn.GetLayerList( layerNames );
-	for( int layerIndex = 0; layerIndex < layerNames.Size(); ++layerIndex ) {
-		CPtr<CBaseLayer> layer = dnn.GetLayer( layerNames[layerIndex] );
-		result.Add( prefix + layer->GetName(), layer.Ptr() );
-		CCompositeLayer* compositePtr = dynamic_cast<CCompositeLayer*>( layer.Ptr() );
-		if( compositePtr != nullptr ) {
-			mapLayerIdToPtr( *compositePtr, result, prefix + compositePtr->GetName() );
-		}
-	}
-}
-
-void mapLayerPtrToId( CDnnLayerGraph& dnn, CMap<CBaseLayer*, CString>& result, const CString& prefix = "" )
-{
-	CArray<const char*> layerNames;
-	dnn.GetLayerList( layerNames );
-	for( int layerIndex = 0; layerIndex < layerNames.Size(); ++layerIndex ) {
-		CPtr<CBaseLayer> layer = dnn.GetLayer( layerNames[layerIndex] );
-		result.Add( layer.Ptr(), prefix + layer->GetName() );
-		CCompositeLayer* compositePtr = dynamic_cast<CCompositeLayer*>( layer.Ptr() );
-		if( compositePtr != nullptr ) {
-			mapLayerPtrToId( *compositePtr, result, prefix + compositePtr->GetName() );
-		}
-	}
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
+static constexpr const char* const layerPathSeparator = "/";
 
 CDnnSolver::CDnnSolver( IMathEngine& _mathEngine ) :
 	mathEngine( _mathEngine ),
@@ -179,8 +150,10 @@ void CDnnSolver::AddDiff( CBaseLayer* layer, const CObjectArray<CDnnBlob>& param
 		reduceOrder.Add( layer );
 	}
 
-	CDiffBlobSum& paramDiffBlobsSum = layerToParamDiffBlobsSum.GetOrCreateValue( layer );
-
+	CDiffBlobSum& paramDiffBlobsSum = layerToParamDiffBlobsSum.GetOrCreateValue( layer->GetPath( layerPathSeparator ) );
+	if( paramDiffBlobsSum.LayerOwner == nullptr ) {
+		paramDiffBlobsSum.LayerOwner = layer;
+	}
 	if( !sharedWeights ) {
 		++paramDiffBlobsSum.Count;
 	}
@@ -206,11 +179,13 @@ void CDnnSolver::Train( float distributedCoeff )
 	for( TMapPosition pos = layerToParamDiffBlobsSum.GetFirstPosition(); pos != NotFound;
 		pos = layerToParamDiffBlobsSum.GetNextPosition( pos ) )
 	{
-		CBaseLayer* layer = layerToParamDiffBlobsSum.GetKey( pos );
+		const CString layerPath = layerToParamDiffBlobsSum.GetKey( pos );
 		CDiffBlobSum& paramDiffBlobsSum = layerToParamDiffBlobsSum.GetValue( pos );
 		if( paramDiffBlobsSum.Sum.IsEmpty() ) {
 			continue;
 		}
+		const CBaseLayer* layer = paramDiffBlobsSum.LayerOwner;
+		NeoAssert( layer != nullptr );
 		NeoAssert( paramDiffBlobsSum.Count > 0 );
 
 		// Take the average of the gradients to simulate that the elements from all runs were in the same batch
@@ -226,7 +201,7 @@ void CDnnSolver::Train( float distributedCoeff )
 		clipGradients( paramDiffBlobsSum.Sum );
 
 		// Train the layer based on the calculated diff data
-		TrainLayer( layer, layer->paramBlobs, paramDiffBlobsSum.Sum, layerToGradientHistory.GetOrCreateValue( layer ) );
+		TrainLayer( layer, layer->paramBlobs, paramDiffBlobsSum.Sum, layerToGradientHistory.GetOrCreateValue( layerPath ) );
 
 		// Clear the diff data
 		paramDiffBlobsSum.Sum.Empty();
@@ -322,57 +297,123 @@ void CDnnSolver::clipGradients(const CObjectArray<CDnnBlob>& paramDiffBlobs)
 	}
 }
 
-static const int DnnSolverVersion = 1;
+static CString concatLayerPath( const CArray<CString>& path )
+{
+	CString layerPath = path[0];
+	for( int i = 1; i < path.Size(); ++i ) {
+		layerPath += layerPathSeparator + path[i];
+	}
+	return layerPath;
+}
+
+void CDnnSolver::loadPrevVersionDnnSolverMaps( CArchive& archive, const CDnn& dnn )
+{
+	CMap<CString, CArray<CString>> layerPrevIdToPath;
+	auto mapLayerIdToPath = [&layerPrevIdToPath]( const CDnnLayerGraph& dnn, auto& mapLayerIdToPath ) -> void
+	{
+		CArray<const char*> layerNames;
+		dnn.GetLayerList( layerNames );
+		for( const char* layerName : layerNames ) {
+			const CBaseLayer* layer = dnn.GetLayer( layerName );
+			const CString layerPath = layer->GetPath( "" );
+			CArray<CString>& path = layerPrevIdToPath.GetOrCreateValue( layerPath );
+			layer->GetPath( path );
+			NeoAssert( path.Size() );
+			const CCompositeLayer* composite = dynamic_cast<const CCompositeLayer*>( layer );
+			if( composite != nullptr ) {
+				mapLayerIdToPath( *composite, mapLayerIdToPath );
+			}
+		}
+	};
+	mapLayerIdToPath( dnn, mapLayerIdToPath );
+
+	auto convertOldIdToLayerPath = [&]( const CBaseLayer** layer )
+	{
+		CString layerId;
+		archive >> layerId;
+		const CArray<CString>& path = layerPrevIdToPath[layerId];
+		if( layer != nullptr ) {
+			*layer = dnn.GetLayer( path );
+		}
+		return concatLayerPath( path );
+	};
+
+	int size;
+	archive >> size;
+	for( int i = 0; i < size; ++i ) {
+		const CBaseLayer* layerTemp = nullptr;
+		const CString layerPath = convertOldIdToLayerPath( &layerTemp );
+
+		CDiffBlobSum& blobSum = layerToParamDiffBlobsSum.GetOrCreateValue( layerPath );
+		archive >> blobSum.Count;
+		SerializeBlobs( mathEngine, archive, blobSum.Sum );
+		blobSum.LayerOwner = layerTemp;
+	}
+
+	archive >> size;
+	for( int i = 0; i < size; ++i ) {
+		const CString layerPath = convertOldIdToLayerPath( nullptr );
+		SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetOrCreateValue( layerPath ) );
+	}
+}
+
+static const int DnnSolverVersion = 2;
 
-void CDnnSolver::Serialize( CArchive& archive, CDnn& dnn )
+void CDnnSolver::Serialize( CArchive& archive, const CDnn& dnn )
 {
 	const int version = archive.SerializeVersion( DnnSolverVersion );
 	if( archive.IsStoring() ) {
-		CMap<CBaseLayer*, CString> layerPtrToId;
-		mapLayerPtrToId( dnn, layerPtrToId );
-
 		archive << layerToParamDiffBlobsSum.Size();
 		for( int pos = layerToParamDiffBlobsSum.GetFirstPosition(); pos != NotFound;
 			pos = layerToParamDiffBlobsSum.GetNextPosition( pos ) )
 		{
-			archive << layerPtrToId[layerToParamDiffBlobsSum.GetKey( pos )];
+			CString layerPath = layerToParamDiffBlobsSum.GetKey( pos );
+			const CBaseLayer* layer = layerToParamDiffBlobsSum.GetValue( pos ).LayerOwner;
+			NeoAssert( layer != nullptr );
+			CArray<CString> path;
+			layer->GetPath( path );
+			archive.Serialize( path );
+			NeoAssert( path.Size() );
+
 			archive << layerToParamDiffBlobsSum.GetValue( pos ).Count;
 			SerializeBlobs( mathEngine, archive, layerToParamDiffBlobsSum.GetValue( pos ).Sum );
-		}
 
-		archive << layerToGradientHistory.Size();
-		for( int pos = layerToGradientHistory.GetFirstPosition(); pos != NotFound;
-			pos = layerToGradientHistory.GetNextPosition( pos ) )
-		{
-			archive << layerPtrToId[layerToGradientHistory.GetKey( pos )];
-			SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetValue( pos ) );
+			const bool hasGradientHistory = layerToGradientHistory.Has( layerPath );
+			archive << hasGradientHistory;
+			if( hasGradientHistory ) {
+				SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetValue( pos ) );
+			}
 		}
 		archive << learningRate << regularizationL1 << regularizationL2 << maxGradientNorm;
 		archive << clipGradientMin << clipGradientMax;
 	} else {
-		CMap<CString, CBaseLayer*> layerIdToPtr;
-		mapLayerIdToPtr( dnn, layerIdToPtr );
-
 		layerToParamDiffBlobsSum.DeleteAll();
 		layerToGradientHistory.DeleteAll();
 		layersToReduce.DeleteAll();
 		reduceOrder.DeleteAll();
 
-		int size;
-		archive >> size;
-		for( int i = 0; i < size; ++i ) {
-			CString layerId;
-			archive >> layerId;
-			CDiffBlobSum& blobSum = layerToParamDiffBlobsSum.GetOrCreateValue( layerIdToPtr[layerId] );
-			archive >> blobSum.Count;
-			SerializeBlobs( mathEngine, archive, blobSum.Sum );
-		}
-
-		archive >> size;
-		for( int i = 0; i < size; ++i ) {
-			CString layerId;
-			archive >> layerId;
-			SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetOrCreateValue( layerIdToPtr[layerId] ) );
+		if( version >= 2 ) {
+			int size;
+			archive >> size;
+			for( int i = 0; i < size; ++i ) {
+				CArray<CString> path;
+				archive.Serialize( path );
+				NeoAssert( path.Size() );
+
+				const CString layerPath = concatLayerPath( path );
+				CDiffBlobSum& blobSum = layerToParamDiffBlobsSum.GetOrCreateValue( layerPath );
+				archive >> blobSum.Count;
+				SerializeBlobs( mathEngine, archive, blobSum.Sum );
+				blobSum.LayerOwner = dnn.GetLayer( path );
+
+				bool hasGradientHistory;
+				archive >> hasGradientHistory;
+				if( hasGradientHistory ) {
+					SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetOrCreateValue( layerPath ) );
+				}
+			}
+		} else {
+			loadPrevVersionDnnSolverMaps( archive, dnn );
 		}
 		archive >> learningRate >> regularizationL1 >> regularizationL2 >> maxGradientNorm;
 		if( version >= 1 ) {
@@ -396,7 +437,7 @@ CDnnSimpleGradientSolver::CDnnSimpleGradientSolver( IMathEngine& mathEngine ) :
 
 static const int DnnSimpleGradientSolverVersion = 0;
 
-void CDnnSimpleGradientSolver::Serialize( CArchive& archive, CDnn& dnn )
+void CDnnSimpleGradientSolver::Serialize( CArchive& archive, const CDnn& dnn )
 {
 	archive.SerializeVersion( DnnSimpleGradientSolverVersion );
 	CDnnSolver::Serialize( archive, dnn );
@@ -491,7 +532,7 @@ void CDnnAdaptiveGradientSolver::EnableDecoupledWeightDecay( bool enable )
 
 static const int DnnAdaptiveGradientSolver = 1;
 
-void CDnnAdaptiveGradientSolver::Serialize( CArchive& archive, CDnn& dnn )
+void CDnnAdaptiveGradientSolver::Serialize( CArchive& archive, const CDnn& dnn )
 {
 	const int version = archive.SerializeVersion( DnnAdaptiveGradientSolver );
 	CDnnSolver::Serialize( archive, dnn );
@@ -665,7 +706,7 @@ void CDnnNesterovGradientSolver::EnableDecoupledWeightDecay( bool enable )
 
 static const int DnnNesterovGradientSolverVersion = 1;
 
-void CDnnNesterovGradientSolver::Serialize( CArchive& archive, CDnn& dnn )
+void CDnnNesterovGradientSolver::Serialize( CArchive& archive, const CDnn& dnn )
 {
 	const int version = archive.SerializeVersion( DnnNesterovGradientSolverVersion );
 	CDnnSolver::Serialize( archive, dnn );
@@ -850,7 +891,7 @@ void CDnnLambGradientSolver::ExcludeBiasParamLayers()
 
 static const int DnnLambGradientSolverVersion = 0;
 
-void CDnnLambGradientSolver::Serialize( CArchive& archive, CDnn& dnn )
+void CDnnLambGradientSolver::Serialize( CArchive& archive, const CDnn& dnn )
 {
 	archive.SerializeVersion( DnnLambGradientSolverVersion );
 	CDnnSolver::Serialize( archive, dnn );
diff --git a/NeoML/src/Dnn/Layers/DnnHeadAdapterLayer.cpp b/NeoML/src/Dnn/Layers/DnnHeadAdapterLayer.cpp
new file mode 100644
index 000000000..12684c9a1
--- /dev/null
+++ b/NeoML/src/Dnn/Layers/DnnHeadAdapterLayer.cpp
@@ -0,0 +1,274 @@
+/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http ://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-------------------------------------------------------------------------------------------------------------- */
+
+#include <common.h>
+#pragma hdrstop
+
+#include <NeoML/NeoML.h>
+#include <NeoML/Dnn/Dnn.h>
+#include <NeoML/Dnn/Layers/DnnHeadAdapterLayer.h>
+#include <NeoML/NeoMLDefs.h>
+
+namespace NeoML {
+
+static const int DnnHeadAdapterLayerVersion = 0;
+
+void CDnnHeadAdapterLayer::Serialize( CArchive& archive )
+{
+	archive.SerializeVersion( DnnHeadAdapterLayerVersion );
+	CBaseLayer::Serialize( archive );
+
+	bool existHead = ( head != nullptr );
+	archive.Serialize( existHead );
+	if( !existHead ) {
+		return;
+	}
+
+	if( archive.IsStoring() ) {
+		archive << head->headCounter;
+		if( head->headCounter > 0 ) {
+			CString name( head->connections[0]->GetName() );
+			archive << name;
+		} else {
+			NeoAssert( head->dnn != nullptr );
+			archive << head->dnn->layers.Size();
+			for( int i = 0; i < head->dnn->layers.Size(); i++ ) {
+				SerializeLayer( archive, MathEngine(), head->dnn->layers[i] );
+			}
+		}
+
+		head->increment();
+	} else if( archive.IsLoading() ) {
+		int num;
+		archive >> num;
+		if( num > 0 ) {
+			archive >> firstAdapter;
+		} else {
+			int layerSize;
+			archive >> layerSize;
+			layers.SetSize( layerSize );
+			for( int i = 0; i < layerSize; i++ ) {
+				SerializeLayer( archive, MathEngine(), layers[i] );
+			}
+		}
+	} else {
+		NeoAssert( false );
+	}
+}
+
+void CDnnHeadAdapterLayer::OnDnnChanged( CDnn* )
+{
+	// If first adapter - create head dnn and initialize layers
+	// Else sets the internal DNN head using the first connected adapter layer after serialization
+	if( head == nullptr ) {
+		if( !firstAdapter.IsEmpty() ) {
+			SetDnnHead( static_cast<CDnnHeadAdapterLayer*>( ( GetDnn()->GetLayer( firstAdapter ).Ptr() ) )->head );
+		} else if( !layers.IsEmpty() ) {
+			if( GetDnn() != 0 ) {
+				CDnn* internalDnn = FINE_DEBUG_NEW CDnn( GetDnn()->Random(), GetDnn()->GetMathEngine() );
+
+				for( int i = 0; i < layers.Size(); ++i ) {
+					internalDnn->AddLayer( *layers[i] );
+				}
+				head = new CDnnHead();
+				head->dnn = internalDnn;
+				SetDnnHead( head );
+				layers.DeleteAll();
+			}
+		}
+	}
+}
+
+void CDnnHeadAdapterLayer::Reshape()
+{
+	if( head->headCounter > 0 ) {
+		configureFromHead();
+		return;
+	}
+
+	configureAdapter();
+}
+
+void CDnnHeadAdapterLayer::RunOnce()
+{
+	NeoAssert( inputBlobs.Size() == 1 );
+	NeoAssert( head->dnn != nullptr );
+
+	head->sourceLayer->SetBlob( inputBlobs[0] );
+	head->dnn->isReuseMemoryMode = GetDnn()->isReuseMemoryMode;
+	head->dnn->runOnce( GetDnn()->GetCurrentSequencePos() );
+	outputBlobs[0] = head->sinkLayer->GetInputBlob()->GetCopy();
+
+	// save blobs required for next backward/learn
+	if( IsBackwardNeeded() || IsLearningEnabled() ) {
+		saveBlobs();
+	}
+}
+
+void CDnnHeadAdapterLayer::processBackwardOrLearn()
+{
+	NeoAssert( head->dnn->isBackwardPerformed == GetDnn()->isBackwardPerformed );
+
+	if( IsBackwardNeeded() ) {
+		head->sourceLayer->SetDiffBlob( inputDiffBlobs[0] );
+	}
+
+	head->sinkLayer->SetDiffBlob( outputDiffBlobs[0] );
+
+	// loading blobs for backward/learn from last RunOnce
+	loadBlobs();
+
+	head->dnn->backwardRunAndLearnOnce( GetDnn()->GetCurrentSequencePos() );
+	innerInputBlobs.DeleteAll();
+	innerInputBlobs.DeleteAll();
+
+	if( head->headCounter == head->connections.Size() - 1 ) {
+		for( const CBaseLayer* layer : head->dnn->layers ) {
+			if( layer->IsLearningPerformed() ) {
+				int& layerCount = GetDnn()->GetSolver()->layerToParamDiffBlobsSum.GetOrCreateValue( layer->GetPath() ).Count;
+				layerCount = layerCount - head->connections.Size() + 1;
+			}
+		}
+	}
+	head->increment();
+}
+
+void CDnnHeadAdapterLayer::BackwardOnce()
+{
+	processBackwardOrLearn();
+}
+
+void CDnnHeadAdapterLayer::LearnOnce()
+{
+	if( !IsBackwardPerformed() ) {
+		processBackwardOrLearn();
+	}
+}
+
+void CDnnHeadAdapterLayer::SetDnnHead( CPtr<CDnnHead> _head )
+{
+	head = _head;
+	num = head->connections.Size();
+	head->connections.Add( this );
+	ForceReshape();
+}
+
+void CDnnHeadAdapterLayer::configureAdapter()
+{
+	NeoAssert( head->dnn != nullptr );
+	head->sinkLayer = CheckCast<CCompositeSinkLayer>( head->dnn->GetLayer( "sink" ).Ptr() );
+	head->sourceLayer = CheckCast<CCompositeSourceLayer>( head->dnn->GetLayer( "source" ).Ptr() );
+	if( head->sourceLayer->GetBackwardForced() != IsBackwardNeeded() ) {
+		head->sourceLayer->SetBackwardForced( IsBackwardNeeded() );
+	}
+	head->sourceLayer->SetBlobDesc( inputDescs[0] );
+	// If the backward pass requirements have changed, call reshape
+	bool forcedReshape = head->dnn->IsBackwardPerformed() != GetDnn()->IsBackwardPerformed();
+
+	// Set the internal network parameters from the external network parameters
+	head->dnn->setProcessingParams( GetDnn()->IsRecurrentMode(), GetDnn()->GetMaxSequenceLength(),
+		GetDnn()->IsReverseSequense(), GetDnn()->IsBackwardPerformed() );
+	head->dnn->RequestReshape( forcedReshape );
+	head->dnn->SetInitializer( GetDnn()->GetInitializer() );
+
+	head->dnn->SetSolver( GetDnn()->GetSolver() );
+	head->dnn->reshape();
+	configureForBackwardAndLearn();
+
+	outputDescs[0] = head->sinkLayer->inputDescs[0];
+	head->firstAdapterNum = num;
+	head->increment();
+}
+
+void CDnnHeadAdapterLayer::configureFromHead()
+{
+	outputDescs[0] = head->sinkLayer->inputDescs[0];
+	head->increment();
+}
+
+void CDnnHeadAdapterLayer::saveBlobs()
+{
+	for( int i = 0; i < head->inputLayers.Size(); ++i ) {
+		innerInputBlobs.Add( head->inputLayers[i]->inputBlobs[0]->GetCopy() );
+	}
+
+	for( int i = 0; i < head->outputLayers.Size(); ++i ) {
+		innerOutputBlobs.Add( head->outputLayers[i]->outputBlobs[0]->GetCopy() );
+	}
+}
+
+void CDnnHeadAdapterLayer::loadBlobs()
+{
+	for( int i = 0; i < head->inputLayers.Size(); ++i ) {
+		head->inputLayers[i]->inputBlobs[0] = innerInputBlobs[i];
+	}
+
+	for( int i = 0; i < head->outputLayers.Size(); ++i ) {
+		head->outputLayers[i]->outputBlobs[0] = innerOutputBlobs[i];
+	}
+}
+
+void CDnnHeadAdapterLayer::configureForBackwardAndLearn()
+{
+	head->blobsForBackward = 0;
+	head->blobsForLearn = 0;
+	const bool hasBackward = IsBackwardPerformed();
+	bool hasLearn = IsLearningPerformed();
+
+	for( int i = 0; i < head->dnn->layers.Size(); ++i ) {
+		hasLearn |= head->dnn->layers[i]->IsLearningPerformed();
+		auto layer = dynamic_cast<CDropoutLayer*>( head->dnn->layers[i].Ptr() );
+		if( layer != nullptr ) {
+			layer->SetHeadCounter( head->connections.Size() );
+		}
+	}
+
+	if( !hasBackward && !hasLearn ) {
+		return;
+	}
+
+	for( int layerIndex = 0; layerIndex < head->dnn->layers.Size(); ++layerIndex ) {
+		const CBaseLayer& layer = *head->dnn->layers[layerIndex];
+		if( layer.IsBackwardPerformed() && ( layer.BlobsForBackward() & TInputBlobs ) ) {
+			head->inputLayers.Add( head->dnn->layers[layerIndex] );
+		} else if( layer.IsLearningPerformed() && ( layer.BlobsForLearn() & TInputBlobs ) ) {
+			head->inputLayers.Add( head->dnn->layers[layerIndex] );
+		}
+
+		if( layer.IsBackwardPerformed() && ( layer.BlobsForBackward() & TOutputBlobs ) ) {
+			head->outputLayers.Add( head->dnn->layers[layerIndex] );
+		} else if( layer.IsLearningPerformed() && ( layer.BlobsForLearn() & TOutputBlobs ) ) {
+			head->outputLayers.Add( head->dnn->layers[layerIndex] );
+		}
+
+		if( ( !hasBackward || head->blobsForBackward != 0 ) && ( !hasLearn || head->blobsForLearn != 0 ) ) {
+			break;
+		}
+
+		for( int inputIndex = 0; inputIndex < layer.GetInputCount(); ++inputIndex ) {
+			if( dynamic_cast<const CCompositeSourceLayer*>( layer.GetInputLayer( inputIndex ) ) != nullptr ) {
+				if( hasBackward && layer.IsBackwardPerformed() && ( layer.BlobsForBackward() & TInputBlobs ) != 0 ) {
+					head->blobsForBackward |= TInputBlobs;
+				}
+				if( hasLearn && layer.IsLearningPerformed() && ( layer.BlobsForLearn() & TInputBlobs ) != 0 ) {
+					head->blobsForLearn |= TInputBlobs;
+				}
+				break;
+			}
+		}
+	}
+}
+
+} // namespace NeoML
diff --git a/NeoML/src/Dnn/Layers/DropoutLayer.cpp b/NeoML/src/Dnn/Layers/DropoutLayer.cpp
index b833451e7..2d6dd8aec 100644
--- a/NeoML/src/Dnn/Layers/DropoutLayer.cpp
+++ b/NeoML/src/Dnn/Layers/DropoutLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2020 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -103,7 +103,8 @@ void CDropoutLayer::BackwardOnce()
 
 	MathEngine().Dropout( *desc, outputDiffBlobs[0]->GetData(), inputDiffBlobs[0]->GetData() );
 
-	if( !GetDnn()->IsRecurrentMode() || GetDnn()->IsFirstSequencePos() ) {
+	if( ( !GetDnn()->IsRecurrentMode() || GetDnn()->IsFirstSequencePos() ) && ++headCount == headConnections ) {
+		headCount = 0;
 		// Clear the memory after the whole sequence is processed
 		destroyDropoutDesc();
 	}
@@ -112,8 +113,8 @@ void CDropoutLayer::BackwardOnce()
 void CDropoutLayer::initDropoutDesc()
 {
 	if( desc == 0 ) {
-		desc = MathEngine().InitDropout( dropoutRate, isSpatial, isBatchwise, inputBlobs[0]->GetDesc(), outputBlobs[0]->GetDesc(),
-			GetDnn()->Random().Next() );
+		desc = MathEngine().InitDropout( dropoutRate, isSpatial, isBatchwise, inputBlobs[0]->GetDesc(),
+			outputBlobs[0]->GetDesc(), GetDnn()->Random().Next() );
 	}
 }
 
diff --git a/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp b/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp
index c27844b75..1f5b488e2 100644
--- a/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp
+++ b/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@ limitations under the License.
 #include "OptimizerFunctions.h"
 #include <NeoML/Dnn/Layers/ActivationLayers.h>
 #include <NeoML/Dnn/Layers/CompositeLayer.h>
+#include <NeoML/Dnn/Layers/DnnHeadAdapterLayer.h>
 #include <NeoML/Dnn/Layers/DropoutLayer.h>
 #include <NeoML/Dnn/Layers/RecurrentLayer.h>
 #include <NeoML/Dnn/Optimization/Graph.h>
@@ -28,7 +29,7 @@ namespace NeoML {
 namespace optimization {
 
 // Returns copy of an original layer
-CPtr<CBaseLayer> copyLayer( CBaseLayer& original )
+static CPtr<CBaseLayer> copyLayer( CBaseLayer& original )
 {
 	CMemoryFile file;
 	{
@@ -150,6 +151,27 @@ int UnpackComposites( CGraph& graph )
 
 //---------------------------------------------------------------------------------------------------------------------
 
+int OptimizeDnnHeadAdapters( CGraph& graph )
+{
+	CArray<CBaseLayer*> layers;
+	graph.GetLayers( layers );
+
+	int result = 0;
+	for( CBaseLayer* layer : layers ) {
+		CDnnHeadAdapterLayer* adapter = dynamic_cast<CDnnHeadAdapterLayer*>( layer );
+		if( adapter != nullptr ) {
+			CDnnHead* head = adapter->GetDnnHead();
+			NeoAssert( head != nullptr );
+			if( OptimizeDnn( head->GetDnn() ).IsOptimized() ) {
+				++result;
+			}
+		}
+	}
+	return result;
+}
+
+//---------------------------------------------------------------------------------------------------------------------
+
 int RemoveTrivialLayers( CGraph& graph )
 {
 	int trivialLayersRemoved = 0;
@@ -176,6 +198,6 @@ int RemoveTrivialLayers( CGraph& graph )
 	return trivialLayersRemoved;
 }
 
-}
+} // namespace optimization
 
-}
+} // namespace NeoML
diff --git a/NeoML/src/Dnn/Optimization/OptimizerFunctions.h b/NeoML/src/Dnn/Optimization/OptimizerFunctions.h
index d9bebbcb5..ccbd66122 100644
--- a/NeoML/src/Dnn/Optimization/OptimizerFunctions.h
+++ b/NeoML/src/Dnn/Optimization/OptimizerFunctions.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -25,6 +25,9 @@ class CGraph;
 // Returns the number of unpacked composites
 int UnpackComposites( CGraph& graph );
 
+// TODO: unpack if the only head adapter is using a head dnn
+int OptimizeDnnHeadAdapters( CGraph& graph );
+
 // Removes trivial layers (dropouts, linear(1,0) etc.)
 // Returns the number of removed layers
 int RemoveTrivialLayers( CGraph& graph );
diff --git a/NeoML/test/data/LayersSerializationTestData/NeoMLDnnHeadAdapterLayer.arch b/NeoML/test/data/LayersSerializationTestData/NeoMLDnnHeadAdapterLayer.arch
new file mode 100644
index 000000000..d4a7bd30b
Binary files /dev/null and b/NeoML/test/data/LayersSerializationTestData/NeoMLDnnHeadAdapterLayer.arch differ
diff --git a/NeoML/test/src/CMakeLists.txt b/NeoML/test/src/CMakeLists.txt
index 7599ee194..2fc085a7d 100644
--- a/NeoML/test/src/CMakeLists.txt
+++ b/NeoML/test/src/CMakeLists.txt
@@ -13,6 +13,7 @@ target_sources(${PROJECT_NAME} INTERFACE
     ${CMAKE_CURRENT_SOURCE_DIR}/CtcTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/DnnBlobTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/DnnDistributedTest.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/DnnHeadTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/DnnLayersSerializationTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/DnnSerializationTest.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/DnnSimpleTests.cpp
diff --git a/NeoML/test/src/DnnHeadTest.cpp b/NeoML/test/src/DnnHeadTest.cpp
new file mode 100644
index 000000000..8a8f67e3f
--- /dev/null
+++ b/NeoML/test/src/DnnHeadTest.cpp
@@ -0,0 +1,301 @@
+/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------*/
+
+#include <common.h>
+#pragma hdrstop
+
+#include <NeoML/Dnn/DnnHead.h>
+#include <NeoML/Dnn/Layers/DnnHeadAdapterLayer.h>
+#include <TestFixture.h>
+
+using namespace NeoML;
+using namespace NeoMLTest;
+
+//----------------------------------------------------------------------------------------------------------------------
+
+namespace NeoMLTest {
+
+static void initializeDnnBlobs( CDnn& dnn )
+{
+    CRandom random( 0 );
+    CDnnUniformInitializer init( random, -0.5, 0.5 );
+
+    CDnnBlob* source1Blob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 4, 2, 3, 10 } );
+    init.InitializeLayerParams( *source1Blob, -1 );
+    CheckCast<CSourceLayer>( dnn.GetLayer( "source1" ).Ptr() )->SetBlob( source1Blob );
+
+    CDnnBlob* source2Blob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 4, 2, 3, 10 } );
+    init.InitializeLayerParams( *source2Blob, -1 );
+    CheckCast<CSourceLayer>( dnn.GetLayer( "source2" ).Ptr() )->SetBlob( source2Blob );
+
+    CDnnBlob* source3Blob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 4, 2, 3, 10 } );
+    init.InitializeLayerParams( *source3Blob, -1 );
+    CheckCast<CSourceLayer>( dnn.GetLayer( "source3" ).Ptr() )->SetBlob( source3Blob );
+
+    CDnnBlob* targetBlob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 1, 1, 1, 3 } );
+    targetBlob->GetData().SetValueAt( 0, -1.5f );
+    targetBlob->GetData().SetValueAt( 1, 2.4f );
+    targetBlob->GetData().SetValueAt( 2, 4.8f );
+    CheckCast<CSourceLayer>( dnn.GetLayer( "target" ).Ptr() )->SetBlob( targetBlob );
+}
+
+static void createDnn( CDnn& dnn, bool isNaive, int complexity = 1000, float dropoutRate = 0.3f, bool freeTerm = false )
+{
+    CPtr<CSourceLayer> source1 = Source( dnn, "source1" );
+    CPtr<CSourceLayer> source2 = Source( dnn, "source2" );
+    CPtr<CSourceLayer> source3 = Source( dnn, "source3" );
+    CPtr<CSourceLayer> targets = Source( dnn, "target" );
+
+    CPtr<CFullyConnectedLayer> fc01 = FullyConnected( complexity, freeTerm )( "fc01", source1.Ptr() );
+    CPtr<CFullyConnectedLayer> fc02 = FullyConnected( complexity, freeTerm )( "fc02", source2.Ptr() );
+    CPtr<CFullyConnectedLayer> fc03 = FullyConnected( complexity, freeTerm )( "fc03", source3.Ptr() );
+
+    CPtr<CConcatChannelsLayer> concat;
+    
+    if( isNaive ) {
+        // Same architecture but without Head to compare                                         [target]
+        //                                                                                          |
+        //    [source1] --> [fc01] -->  [   ]-->[gelu]-->[   ]-->[relu]-->[   ]  --> [      ]       v
+        //                              |fc1]            |fc2|            |fc3|      |concat| --> [loss]
+        //    [source2] --> [fc02] -->  [   ]-->[gelu]-->[   ]-->[relu]-->[   ]  --> [      ]
+        //
+
+        CPtr<CFullyConnectedLayer> fc1 = FullyConnected( complexity / 20, freeTerm )( "fc1", fc01.Ptr(), fc02.Ptr(), fc03.Ptr() );
+        CPtr<CGELULayer> gelu1 = Gelu()( "gelu1", CDnnLayerLink{ fc1, 0 } );
+        CPtr<CGELULayer> gelu2 = Gelu()( "gelu2", CDnnLayerLink{ fc1, 1 } );
+        CPtr<CGELULayer> gelu3 = Gelu()( "gelu3", CDnnLayerLink{ fc1, 2 } );
+
+        CPtr<CFullyConnectedLayer> fc2 = FullyConnected( complexity / 60, freeTerm )( "fc2", gelu1.Ptr(), gelu2.Ptr(), gelu3.Ptr() );
+        CPtr<CReLULayer> relu1 = Relu()( "relu1", CDnnLayerLink{ fc2, 0 } );
+        CPtr<CReLULayer> relu2 = Relu()( "relu2", CDnnLayerLink{ fc2, 1 } );
+        CPtr<CReLULayer> relu3 = Relu()( "relu3", CDnnLayerLink{ fc2, 2 } );
+
+        CPtr<CDropoutLayer> dropout1 = Dropout( dropoutRate )( "dp1", relu1.Ptr() );
+        CPtr<CDropoutLayer> dropout2 = Dropout( dropoutRate )( "dp2", relu2.Ptr() );
+        CPtr<CDropoutLayer> dropout3 = Dropout( dropoutRate )( "dp3", relu3.Ptr() );
+        CPtr<CFullyConnectedLayer> fc3 = FullyConnected( 1 )( "fc3", dropout1.Ptr(), dropout2.Ptr(), dropout3.Ptr() );
+
+        concat = ConcatChannels()( "concat",
+            CDnnLayerLink{ fc3, 0 }, CDnnLayerLink{ fc3, 1 }, CDnnLayerLink{ fc3, 2 } );
+
+    } else {
+        //        +-----[fc01]- ---+
+        //        |                |                              +-----------+     [target]
+        //        |                v                              |           |        |
+        //    [source1]     |-----------------------------------------|       v        v
+        //                  |[fc1]->[gelu]->[fc2]->[relu]->[dp]->[fc3]|    [concat]->[loss]
+        //    [source2]     |-----------------------------------------|       ^
+        //        |                ^                              |           |
+        //        |                |                              +-----------+
+        //        +-----[fc02]-----+
+
+        CPtr<CDnnHead> head = new CDnnHead(
+            dnn.Random(), dnn.GetMathEngine(),
+            FullyConnected( complexity / 20, freeTerm ), // "fc1"
+            Gelu(),
+            FullyConnected( complexity / 60, freeTerm ), // "fc2"
+            Relu(),
+            Dropout( dropoutRate ),
+            FullyConnected( 1 ) // "fc3",
+        );
+
+        CPtr<CDnnHeadAdapterLayer> head1 = DnnHeadAdapter( head )( "head1", fc01.Ptr() );
+        CPtr<CDnnHeadAdapterLayer> head2 = DnnHeadAdapter( head )( "head2", fc02.Ptr() );
+        CPtr<CDnnHeadAdapterLayer> head3 = DnnHeadAdapter( head )( "head3", fc03.Ptr() );
+
+        concat = ConcatChannels()( "concat", head1.Ptr(), head2.Ptr(), head3.Ptr() );
+    }
+
+    CPtr<CEuclideanLossLayer> loss = EuclideanLoss()( "loss", concat.Ptr(), targets.Ptr() );
+    CPtr<CSinkLayer> sink = Sink( concat.Ptr(), "sink" );
+    
+    CPtr<CDnnAdaptiveGradientSolver> solver = new CDnnAdaptiveGradientSolver( MathEngine() );
+    solver->SetLearningRate( /*learningRate*/1e-3f );
+    dnn.SetSolver( solver.Ptr() );
+
+    initializeDnnBlobs( dnn );
+}
+
+static void testDnnAdapterPerformace( bool isNaive, int interations = 1000, bool train = true )
+{
+    IPerformanceCounters* counters = MathEngine().CreatePerformanceCounters();
+    const char* fileName = "DnnAdapter.cnnarch";
+
+    GTEST_LOG_( INFO ) << "\n interations = " << interations << "   is_naive = " << isNaive << "\n"
+        << "|" << std::setw( 10 ) << "size "
+        << "|" << std::setw( 21 ) << "Train " << "|" << std::setw( 21 ) << "Inference " << "|\n"
+        << "|" << std::setw( 10 ) << ""
+        << "|" << std::setw( 10 ) << "time (ms) " << "|" << std::setw( 10 ) << "mem (MB) "
+        << "|" << std::setw( 10 ) << "time (ms) " << "|" << std::setw( 10 ) << "mem (MB) " << "|\n";
+
+    const int complexity = 1000;
+    for( int size = 1 * complexity; size <= 4 * complexity; size += complexity ) {
+        {
+            CRandom random( 0 );
+            CDnn dnn( random, MathEngine() );
+
+            createDnn( dnn, isNaive, size );
+            OptimizeDnn( dnn );
+
+            dnn.CleanUp( /*force*/true );
+            initializeDnnBlobs( dnn );
+
+            MathEngine().CleanUp();
+            MathEngine().ResetPeakMemoryUsage();
+
+            if( train ) {
+                dnn.RunAndLearnOnce();
+                counters->Synchronise();
+                for( int i = 0; i < interations; ++i ) {
+                    dnn.RunAndLearnOnce();
+                }
+                counters->Synchronise();
+            }
+            CArchiveFile file( fileName, CArchive::store, GetPlatformEnv() );
+            CArchive archive( &file, CArchive::store );
+            archive << dnn;
+        }
+        double train_time = train ? ( double( ( *counters )[0].Value ) / 1000000 ) : 0.;
+        double train_mem = train ? ( double( MathEngine().GetPeakMemoryUsage() ) / 1024 / 1024 ) : 0.;
+
+        {
+            CRandom random( 0 );
+            CDnn dnn( random, MathEngine() );
+
+            CArchiveFile file( fileName, CArchive::load, GetPlatformEnv() );
+            CArchive archive( &file, CArchive::load );
+            archive >> dnn;
+
+            dnn.CleanUp( /*force*/true );
+            initializeDnnBlobs( dnn );
+
+            MathEngine().CleanUp();
+            MathEngine().ResetPeakMemoryUsage();
+
+            dnn.RunOnce();
+            counters->Synchronise();
+            for( int i = 0; i < interations; ++i ) {
+                dnn.RunOnce();
+            }
+            counters->Synchronise();
+        }
+        double inference_time = double( ( *counters )[0].Value ) / 1000000;
+        double inference_mem = double( MathEngine().GetPeakMemoryUsage() ) / 1024 / 1024;
+
+        std::cout
+            << "|" << std::setw( 10 ) << size
+            << "|" << std::setw( 10 ) << train_time << "|" << std::setw( 10 ) << train_mem
+            << "|" << std::setw( 10 ) << inference_time << "|" << std::setw( 10 ) << inference_mem << "|\n";
+    }
+    delete counters;
+}
+
+} // namespace NeoMLTest
+
+//----------------------------------------------------------------------------------------------------------------------
+
+TEST( CDnnHeadTest, DnnHeadAdapterLearnTest )
+{
+    CRandom random( 0x17 );
+    CDnn dnn( random, MathEngine() );
+    createDnn( dnn, /*isNaive*/false, /*complexity*/1000, /*dropout*/0.f );
+
+    for( int i = 0; i < 200; ++i ) {
+        dnn.RunAndLearnOnce();
+    }
+
+    EXPECT_NEAR( CheckCast<CLossLayer>( dnn.GetLayer( "loss" ).Ptr() )->GetLastLoss(), 0, 1e-3f );
+}
+
+TEST( CDnnHeadTest, DnnHeadAdapterInferenceMatch )
+{
+    auto runOnce = []( bool isNaive )
+    {
+        CRandom random( 0x11 );
+        CPtr<CDnnUniformInitializer> init = new CDnnUniformInitializer( random, 0.05f, 0.05f );
+
+        CDnn dnn( random, MathEngine() );
+        dnn.SetInitializer( init.Ptr() );
+        createDnn( dnn, isNaive );
+
+        dnn.RunOnce();
+        return CheckCast<CSinkLayer>( dnn.GetLayer( "sink" ).Ptr() )->GetBlob();
+    };
+
+    CPtr<CDnnBlob> expected = runOnce( /*isNaive*/false );
+    CPtr<CDnnBlob> output = runOnce( /*isNaive*/true );
+
+    EXPECT_TRUE( CompareBlobs( *expected, *output ) );
+}
+
+TEST( CDnnHeadTest, DnnHeadAdapterLearningMatch )
+{
+    CRandom random( 0x01 );
+    CPtr<CDnnUniformInitializer> init = new CDnnUniformInitializer( random, 0.05f, 0.05f );
+
+    CDnn dnnNoAdapters( random, MathEngine() );
+    dnnNoAdapters.SetInitializer( init.Ptr() );
+    createDnn( dnnNoAdapters, /*isNaive*/true, /*complexity*/1000, /*dropout*/0.f, /*freeTerm*/false );
+
+    CRandom randomWithAdapters( 0x01 );
+    CDnn dnnWithAdapters( randomWithAdapters, MathEngine() );
+    dnnWithAdapters.SetInitializer( init.Ptr() );
+    createDnn( dnnWithAdapters, /*isNaive*/false, /*complexity*/1000, /*dropout*/0.f, /*freeTerm*/false );
+
+    CPtr<CLossLayer> expectedLoss = CheckCast<CLossLayer>( dnnNoAdapters.GetLayer( "loss" ).Ptr() );
+    CPtr<CLossLayer> outputLoss = CheckCast<CLossLayer>( dnnWithAdapters.GetLayer( "loss" ).Ptr() );
+
+    for( int i = 0; i < 100; ++i ) {
+        dnnNoAdapters.RunAndLearnOnce();
+        dnnWithAdapters.RunAndLearnOnce();
+        EXPECT_NEAR( expectedLoss->GetLastLoss(), outputLoss->GetLastLoss(), 1e-3f );
+    }
+}
+
+TEST( CDnnHeadTest, DnnHeadAdapterSerializationTest )
+{
+    CRandom random( 0 );
+    CDnn dnn( random, MathEngine() );
+
+    createDnn( dnn, /*isNaive*/false );
+    dnn.RunOnce();
+
+    CPtr<CDnnBlob> expected = CheckCast<CSinkLayer>( dnn.GetLayer( "sink" ).Ptr() )->GetBlob();
+    {
+        CMemoryFile file;
+        {
+            CArchive archive( &file, CArchive::store );
+            dnn.Serialize( archive );
+        }
+        file.SeekToBegin();
+        {
+            CArchive archive( &file, CArchive::load );
+            dnn.Serialize( archive );
+        }
+    }
+    initializeDnnBlobs( dnn );
+    dnn.RunOnce();
+    CPtr<CDnnBlob> output = CheckCast<CSinkLayer>( dnn.GetLayer( "sink" ).Ptr() )->GetBlob();
+    EXPECT_TRUE( CompareBlobs( *expected, *output ) );
+}
+
+TEST( CDnnHeadTest, DISABLED_DnnHeadAdapterInferencePerfomance )
+{
+    DeleteMathEngine();
+    testDnnAdapterPerformace( /*isNaive*/false, /*interations*/200 );
+
+    DeleteMathEngine();
+    testDnnAdapterPerformace( /*isNaive*/true, /*interations*/200 );
+}
diff --git a/NeoML/test/src/DnnLayersSerializationTest.cpp b/NeoML/test/src/DnnLayersSerializationTest.cpp
index 5b9d80b58..d0b3916fb 100644
--- a/NeoML/test/src/DnnLayersSerializationTest.cpp
+++ b/NeoML/test/src/DnnLayersSerializationTest.cpp
@@ -3410,3 +3410,79 @@ GTEST_TEST( SerializeFromFile, LoraFullyConnectedLayerSerialization )
 {
 	checkSerializeLayer<CLoraFullyConnectedLayer>( "NeoMLDnnLoraFullyConnectedLayer" );
 }
+
+// ====================================================================================================================
+
+// CDnnHeadAdapterLayer
+
+static CPtr<CDnnHeadAdapterLayer> createDnnHeadAdapterNet( CDnn& dnn )
+{
+	CPtr<CDnnHead> head = new CDnnHead( dnn.Random(), MathEngine(),
+		FullyConnected( 300 ),
+		Relu()
+	);
+
+	CPtr<CDnnHeadAdapterLayer> layerPtr = new CDnnHeadAdapterLayer( MathEngine() );
+	layerPtr->SetName( LayerName );
+	layerPtr->SetDnnHead( head );
+	dnn.AddLayer( *layerPtr );
+
+	return layerPtr;
+}
+
+#ifdef GENERATE_SERIALIZATION_FILES
+
+GTEST_TEST( SerializeToFile, DnnHeadAdapterLayerSerialization )
+{
+	CRandom random;
+	CDnn dnn( random, MathEngine() );
+
+	CPtr<CDnnHeadAdapterLayer> layerPtr = createDnnHeadAdapterNet( dnn );
+	setBaseParams( *layerPtr );
+
+	CArchiveFile file( getFileName( "NeoMLDnnHeadAdapterLayer" ), CArchive::store );
+	CArchive archive( &file, CArchive::store );
+	archive.Serialize( dnn );
+}
+
+#endif // GENERATE_SERIALIZATION_FILES
+
+template<>
+inline void checkSpecificParams<CDnnHeadAdapterLayer>( CDnnHeadAdapterLayer& layer )
+{
+	auto runOnce = []( CDnn& dnn, CDnnHeadAdapterLayer& adapter )
+	{
+		CSourceLayer* source = dnn.HasLayer( "source" )
+			? CheckCast<CSourceLayer>( dnn.GetLayer( "source" ).Ptr() )
+			: Source( dnn, "source" );
+
+		CPtr<CDnnBlob> blob = CDnnBlob::CreateVector( MathEngine(), CT_Float, 2 );
+		blob->Fill( TestFloatValue );
+
+		source->SetBlob( blob );
+		adapter.Connect( *source );
+
+		CSinkLayer* sink = dnn.HasLayer( "sink" )
+			? CheckCast<CSinkLayer>( dnn.GetLayer( "sink" ).Ptr() )
+			: Sink( &adapter, "sink" );
+
+		dnn.RunOnce();
+		return sink->GetBlob();
+	};
+
+	CRandom random;
+	CDnn dnn( random, MathEngine() );
+
+	CPtr<CDnnBlob> expected = runOnce( dnn, *createDnnHeadAdapterNet( dnn ) );
+	CPtr<CDnnBlob> output = runOnce( *layer.GetDnn(), layer );
+	EXPECT_TRUE( CompareBlobs( *expected, *output ) );
+	EXPECT_TRUE( layer.GetDnnHead() ); // found and inited
+}
+
+GTEST_TEST( SerializeFromFile, DnnHeadAdapterLayerSerialization )
+{
+	checkSerializeLayer<CDnnHeadAdapterLayer>( "NeoMLDnnHeadAdapterLayer" );
+}
+
+// ====================================================================================================================
+