neoml-lib · daniyalaliev · Apr 1, 2024 · May 6, 2024 · May 31, 2024 · May 31, 2024
diff --git a/NeoML/include/NeoML/Dnn/Dnn.h b/NeoML/include/NeoML/Dnn/Dnn.h
@@ -98,20 +98,18 @@ struct CDnnLayerLink {
 	int OutputNumber;
 
 	// Default value for optional inputs.
-	CDnnLayerLink() : Layer( 0 ), OutputNumber( -1 ) {}
-	CDnnLayerLink( const CDnnLayerLink& other ) :
-		Layer( other.Layer ), OutputNumber( other.OutputNumber ) {}
-	CDnnLayerLink( CBaseLayer* layer, int outputNumber ) :
+	CDnnLayerLink() : Layer( nullptr ), OutputNumber( -1 ) {}
+	CDnnLayerLink( CBaseLayer* layer, int outputNumber = 0 ) :
 		Layer( layer ),
 		OutputNumber( outputNumber )
 	{
-		NeoAssert( Layer != 0 );
+		NeoAssert( Layer != nullptr );
 		NeoAssert( OutputNumber >= 0 );
 	}
-
-	// Converting constructor
-	CDnnLayerLink( CBaseLayer* layer ) :
-		Layer( layer ), OutputNumber( 0 ) {}
+	CDnnLayerLink( CDnnLayerLink&& ) = default;
+	CDnnLayerLink( const CDnnLayerLink& other ) :
+		Layer( other.Layer ), OutputNumber( other.OutputNumber )
+	{}
 
 	// Is this layer optional, i.e. created by CLayerOutout() default constructor.
 	bool IsOptional() const { return Layer == 0 && OutputNumber == -1; }
@@ -149,7 +147,10 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	//
 	// e.g. layer "InputHidden" inside of CLstmLayer named "LSTM", which is inside of CCompositeLayer named "Encoder"
 	// has path "Encoder/LSTM/InputHidden"
-	CString GetPath() const;
+	CString GetPath( const char* sep = "/" ) const;
+	// Path in form suitable for dnn->GetLayer( CArray<CString>& path );
+	// Returns an empty array if the path cannot be constructed.
+	void GetPath( CArray<CString>& path ) const;
 
 	// Connects this layer's inputNumber input to the specified layer's outputNumber output
 	virtual void Connect( int inputNumber, const char* layer, int outputNumber = 0 );
@@ -390,7 +391,8 @@ class NEOML_API CBaseLayer : public virtual IObject {
 
 	// Set the 'dist' layer's paramBlobs to point to the data of this layer's paramBlobs
 	void transferParamsBlob(CBaseLayer& dist) const;
-
+	// Technical method for recursion in GetPath( CArray<CString>& path )
+	void getPath( CArray<CString>& path ) const;
 	// Switches the specified blobs into sequence processing mode
 	void switchBlobsToSequentialMode(CObjectArray<CDnnBlob>& blobs, TBlobCacheType cacheType, bool storeParent);
 	void switchBlobsToNonSequentialMode(CObjectArray<CDnnBlob>& blobs, TBlobCacheType cacheType, bool clear);
@@ -432,6 +434,7 @@ class NEOML_API CBaseLayer : public virtual IObject {
 	friend class CDnnLayerGraph;
 	friend class CDnnSolver;
 	friend class CCompositeLayer;
+	friend class CDnnHeadAdapterLayer;
 };
 
 //------------------------------------------------------------------------------------------------------------
@@ -684,6 +687,7 @@ class NEOML_API CDnn : public CDnnLayerGraph {
 	friend class CCompositeLayer;
 	friend class CRecurrentLayer;
 	friend class CDnnReferenceRegister;
+	friend class CDnnHeadAdapterLayer;
 };
 
 inline CArchive& operator<<( CArchive& archive, const CDnn& dnn)

diff --git a/NeoML/include/NeoML/Dnn/Dnn.inl b/NeoML/include/NeoML/Dnn/Dnn.inl
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -56,9 +56,26 @@ inline bool CBaseLayer::IsBackwardNeeded() const
 	return isBackwardNeeded == BS_NeedsBackward;
 }
 
-inline CString CBaseLayer::GetPath() const
+inline CString CBaseLayer::GetPath( const char* sep ) const
 {
-	return dnn == nullptr || dnn->owner == nullptr ? name : dnn->owner->GetPath() + "/" + name;
+	return ( dnn == nullptr || dnn->owner == nullptr ) ? name : ( dnn->owner->GetPath( sep ) + sep + name );
+}
+
+inline void CBaseLayer::GetPath( CArray<CString>& path ) const
+{
+	path.DeleteAll();
+	getPath( path );
+}
+
+inline void CBaseLayer::getPath( CArray<CString>& path ) const
+{
+	if( dnn == nullptr ) {
+		return;
+	}
+	if( dnn->owner != nullptr ) {
+		dnn->owner->getPath( path );
+	}
+	path.Add( name );
 }
 
 inline void CBaseLayer::CheckLayerArchitecture( bool expr, const char* message ) const

diff --git a/NeoML/include/NeoML/Dnn/DnnHead.h b/NeoML/include/NeoML/Dnn/DnnHead.h
@@ -0,0 +1,100 @@
+/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <initializer_list>
+#include <NeoML/NeoML.h>
+#include <NeoML/Dnn/Dnn.h>
+#include <NeoML/Dnn/Layers/CompositeLayer.h>
+
+namespace NeoML {
+
+template <typename T>
+class CLayerWrapper;
+class CDnnHeadAdapterLayer;
+class CGraph;
+
+namespace optimization {
+int OptimizeDnnHeadAdapters( NeoML::CGraph& );
+}
+
+class CDnnHead : public IObject {
+public:
+	CDnnHead() = default;
+
+	template <typename... Ts>
+	CDnnHead(CRandom& random, IMathEngine& mathEngine, CLayerWrapper<Ts>... linearWrappers)
+	{
+		CDnn* head(new CDnn(random, mathEngine));
+
+		CPtr<CCompositeSourceLayer> source = new CCompositeSourceLayer(head->GetMathEngine());
+		source->SetName("source");
+		head->AddLayer(*source);
+		CBaseLayer* inputLayer = source;
+
+		// chain connect wrapped layers
+		using TExpanding = CBaseLayer * [];
+		TExpanding{ inputLayer = linearWrappers(inputLayer)... };
+
+		CPtr<CCompositeSinkLayer> headSink = new CCompositeSinkLayer(head->GetMathEngine());
+		headSink->SetName("sink");
+		head->AddLayer(*headSink);
+		headSink->Connect(0, *(inputLayer));
+		dnn = head;
+	}
+
+	CDnn& GetDnn() { return *dnn; }
+
+private:
+	~CDnnHead() override
+	{
+		if( dnn != nullptr ) {
+			delete dnn;
+			dnn = nullptr;
+		}
+	}
+
+	void increment()
+	{
+		if( ++headCounter == connections.Size() ) {
+			headCounter = 0;
+			firstAdapterNum = -1;
+		}
+	}
+
+	CDnn* dnn = nullptr;
+
+	// Stores all adapter using this head
+	CObjectArray<CDnnHeadAdapterLayer> connections;
+	// Layers for which input/output blobs are stored for Backward/Learn
+	CArray<CBaseLayer*> inputLayers;
+	CArray<CBaseLayer*> outputLayers;
+	// Pointers to source/sink layers of inner network
+	CCompositeSourceLayer* sourceLayer = nullptr;
+	CCompositeSinkLayer* sinkLayer = nullptr;
+	// Which of the blobs will be used during backward
+	int blobsForBackward = 0;
+	// Which of the blobs will be used during learn
+	int blobsForLearn = 0;
+
+	int headCounter = 0;
+	int firstAdapterNum = -1;
+
+	friend class CDnnHeadAdapterLayer;
+	friend int optimization::OptimizeDnnHeadAdapters( CGraph& );
+};
+
+} // namespace NeoML
diff --git a/NeoML/include/NeoML/Dnn/DnnOptimization.h b/NeoML/include/NeoML/Dnn/DnnOptimization.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,6 +23,8 @@ class CDnn;
 
 // Struct which contains the details of optimization result
 struct NEOML_API CDnnOptimizationReport final {
+	// Number of HeadAdapters layers which internal dnn is optimized
+	int OptimizedHeadAdapters = 0;
 	// Number of composite layers which where unpacked
 	// (unpack == content of the layer moved to the root CDnn, composite itself is removed)
 	int UnpackedCompositeLayers = 0;
@@ -51,7 +53,8 @@ struct NEOML_API CDnnOptimizationReport final {
 // Check for is any optimization succeed
 inline bool CDnnOptimizationReport::IsOptimized() const
 {
-	return UnpackedCompositeLayers > 0
+	return OptimizedHeadAdapters > 0
+		|| UnpackedCompositeLayers > 0
 		|| RemovedTrivialLayers > 0
 		|| FusedBatchNormalizations > 0
 		|| ChannelwiseWith1x1NonResidual > 0

diff --git a/NeoML/include/NeoML/Dnn/DnnSolver.h b/NeoML/include/NeoML/Dnn/DnnSolver.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ class NEOML_API CDnnSolver : virtual public IObject {
 	void SetMinMaxGradientClipping( float min, float max ) { clipGradientMin = min; clipGradientMax = max; }
 
 	// Serialize to archive
-	virtual void Serialize( CArchive& archive, CDnn& dnn );
+	virtual void Serialize( CArchive& archive, const CDnn& dnn );
 
 protected:
 	explicit CDnnSolver( IMathEngine& mathEngine );
@@ -86,18 +86,17 @@ class NEOML_API CDnnSolver : virtual public IObject {
 	float clipGradientMax;
 
 	// The blobs sum
-	struct CDiffBlobSum {
-		CDiffBlobSum() : Count( 0 ) {}
-
-		CObjectArray<CDnnBlob> Sum; // the blobs sums
-		int Count; // the number of terms in each sum
+	struct CDiffBlobSum final {
+		const CBaseLayer* LayerOwner{}; // for the given layer
+		CObjectArray<CDnnBlob> Sum{}; // the blobs sums
+		int Count{}; // the number of terms in each sum
 	};
 
 	// The buffers used to add up the gradients from several AddDiff calls
-	CMap<CBaseLayer*, CDiffBlobSum> layerToParamDiffBlobsSum;
+	CMap<CString, CDiffBlobSum> layerToParamDiffBlobsSum;
 	// The buffers for storing gradients history and moment
 	// Used in the inheriting classes
-	CMap<CBaseLayer*, CObjectArray<CDnnBlob>> layerToGradientHistory;
+	CMap<CString, CObjectArray<CDnnBlob>> layerToGradientHistory;
 	// Layers which require reduction across distributed solver
 	CHashTable<CBaseLayer*> layersToReduce; // Fast check if layer is included already
 	CArray<CBaseLayer*> reduceOrder; // Correct order across all of the distributed nets
@@ -112,6 +111,10 @@ class NEOML_API CDnnSolver : virtual public IObject {
 
 	// Telling the compiler that we intentionally using two-parameter Serialize instead of one declared in IObject
 	using IObject::Serialize;
+	// Convert maps from the previous serialization format
+	void loadPrevVersionDnnSolverMaps( CArchive& archive, const CDnn& dnn );
+
+	friend class CDnnHeadAdapterLayer;
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////
@@ -170,7 +173,7 @@ class NEOML_API CDnnSimpleGradientSolver : public CDnnSolver {
 	bool IsInCompatibilityMode() const { return isInCompatibilityMode; }
 	void SetCompatibilityMode( bool compatibilityMode ) { isInCompatibilityMode = compatibilityMode; }
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs, 
@@ -234,7 +237,7 @@ class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver {
 	// May be called only before training starts.
 	void EnableDecoupledWeightDecay( bool enable );
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	// Resets to the initial state
@@ -335,7 +338,7 @@ class NEOML_API CDnnNesterovGradientSolver : public CDnnSolver {
 	// May be called only before training starts.
 	void EnableDecoupledWeightDecay( bool enable );
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	// Resets to the initial state
@@ -482,7 +485,7 @@ class NEOML_API CDnnLambGradientSolver : public CDnnSolver {
 	bool GetUseNVLamb() const { return useNvLamb; }
 	void SetUseNVLamb( bool value ) { useNvLamb = value; }
 
-	void Serialize( CArchive& archive, CDnn& dnn ) override;
+	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
 	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs,

diff --git a/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h b/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h
@@ -0,0 +1,83 @@
+/* Copyright © 2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <memory>
+#include <NeoML/NeoMLDefs.h>
+#include <NeoML/Dnn/Dnn.h>
+#include <NeoML/Dnn/DnnHead.h>
+
+namespace NeoML {
+
+// CDnnHeadAdapterLayer passes data blobs between multiple external layers and a shared internal DNN (head)
+// Unlike CompositeLayer, it allows to connect several external layers to same head
+class NEOML_API CDnnHeadAdapterLayer final : public CBaseLayer {
+	NEOML_DNN_LAYER( CDnnHeadAdapterLayer )
+public:
+	explicit CDnnHeadAdapterLayer( IMathEngine& mathEngine, const char* name = nullptr )
+		: CBaseLayer( mathEngine, name == nullptr ? "CDnnHeadAdapterLayer" : name, /*isLearnable*/true )
+	{}
+
+	void Serialize( CArchive& archive ) override;
+
+	// Internal shared Dnn between DnnHeadAdapters
+	void SetDnnHead( CPtr<CDnnHead> head );
+
+	// Get Dnn head
+	const CDnnHead* GetDnnHead() const { return head; };
+	CDnnHead* GetDnnHead() { return head; };
+
+protected:
+	void Reshape() override;
+	void RunOnce() override;
+	void BackwardOnce() override;
+	void LearnOnce() override;
+	// It does not allocate outputBlobs in CBaseLayer in runOnce, because they are not used for inference.
+	// The outputBlob for CDnnHeadAdapterLayer are sinkLayer->GetBlob() of its internalDnn.
+	void AllocateOutputBlobs() override {}
+	int BlobsForBackward() const override { return head->blobsForBackward; }
+	int BlobsForLearn() const override { return head->blobsForLearn; }
+
+private:
+	// Pointer to HeadLayer with inner dnn
+	CPtr<CDnnHead> head = nullptr;
+	// Save first adapter name to connect to necessary head in serialization
+	CString firstAdapter;
+	// Stores the number of the layer connected to the internal network
+	int num = -1;
+	// Temporarily used to store layers during serialization
+	CObjectArray<CBaseLayer> layers;
+	// Stores the input/output blobs from last Inference
+	CObjectArray<CDnnBlob> innerInputBlobs;
+	CObjectArray<CDnnBlob> innerOutputBlobs;
+
+	void OnDnnChanged( CDnn* ) override;
+	void processBackwardOrLearn();
+	void configureAdapter();
+	void configureFromHead();
+	void saveBlobs();
+	void loadBlobs();
+	void configureForBackwardAndLearn();
+};
+
+inline NEOML_API CLayerWrapper<CDnnHeadAdapterLayer> DnnHeadAdapter( CDnnHead* head )
+{
+	return CLayerWrapper<CDnnHeadAdapterLayer>( "DnnHeadAdapter", [=]( CDnnHeadAdapterLayer* result ) {
+		result->SetDnnHead( head );
+	} );
+}
+
+} // namespace NeoML