diff --git a/NeoML/include/NeoML/Dnn/Dnn.h b/NeoML/include/NeoML/Dnn/Dnn.h index e461ae73a..c8d7f11ca 100644 --- a/NeoML/include/NeoML/Dnn/Dnn.h +++ b/NeoML/include/NeoML/Dnn/Dnn.h @@ -98,20 +98,18 @@ struct CDnnLayerLink { int OutputNumber; // Default value for optional inputs. - CDnnLayerLink() : Layer( 0 ), OutputNumber( -1 ) {} - CDnnLayerLink( const CDnnLayerLink& other ) : - Layer( other.Layer ), OutputNumber( other.OutputNumber ) {} - CDnnLayerLink( CBaseLayer* layer, int outputNumber ) : + CDnnLayerLink() : Layer( nullptr ), OutputNumber( -1 ) {} + CDnnLayerLink( CBaseLayer* layer, int outputNumber = 0 ) : Layer( layer ), OutputNumber( outputNumber ) { - NeoAssert( Layer != 0 ); + NeoAssert( Layer != nullptr ); NeoAssert( OutputNumber >= 0 ); } - - // Converting constructor - CDnnLayerLink( CBaseLayer* layer ) : - Layer( layer ), OutputNumber( 0 ) {} + CDnnLayerLink( CDnnLayerLink&& ) = default; + CDnnLayerLink( const CDnnLayerLink& other ) : + Layer( other.Layer ), OutputNumber( other.OutputNumber ) + {} // Is this layer optional, i.e. created by CLayerOutout() default constructor. bool IsOptional() const { return Layer == 0 && OutputNumber == -1; } @@ -149,7 +147,10 @@ class NEOML_API CBaseLayer : public virtual IObject { // // e.g. layer "InputHidden" inside of CLstmLayer named "LSTM", which is inside of CCompositeLayer named "Encoder" // has path "Encoder/LSTM/InputHidden" - CString GetPath() const; + CString GetPath( const char* sep = "/" ) const; + // Path in form suitable for dnn->GetLayer( CArray& path ); + // Returns an empty array if the path cannot be constructed. + void GetPath( CArray& path ) const; // Connects this layer's inputNumber input to the specified layer's outputNumber output virtual void Connect( int inputNumber, const char* layer, int outputNumber = 0 ); @@ -390,7 +391,8 @@ class NEOML_API CBaseLayer : public virtual IObject { // Set the 'dist' layer's paramBlobs to point to the data of this layer's paramBlobs void transferParamsBlob(CBaseLayer& dist) const; - + // Technical method for recursion in GetPath( CArray& path ) + void getPath( CArray& path ) const; // Switches the specified blobs into sequence processing mode void switchBlobsToSequentialMode(CObjectArray& blobs, TBlobCacheType cacheType, bool storeParent); void switchBlobsToNonSequentialMode(CObjectArray& blobs, TBlobCacheType cacheType, bool clear); @@ -432,6 +434,7 @@ class NEOML_API CBaseLayer : public virtual IObject { friend class CDnnLayerGraph; friend class CDnnSolver; friend class CCompositeLayer; + friend class CDnnHeadAdapterLayer; }; //------------------------------------------------------------------------------------------------------------ @@ -684,6 +687,7 @@ class NEOML_API CDnn : public CDnnLayerGraph { friend class CCompositeLayer; friend class CRecurrentLayer; friend class CDnnReferenceRegister; + friend class CDnnHeadAdapterLayer; }; inline CArchive& operator<<( CArchive& archive, const CDnn& dnn) diff --git a/NeoML/include/NeoML/Dnn/Dnn.inl b/NeoML/include/NeoML/Dnn/Dnn.inl index 497019ab7..81123b43a 100644 --- a/NeoML/include/NeoML/Dnn/Dnn.inl +++ b/NeoML/include/NeoML/Dnn/Dnn.inl @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -56,9 +56,26 @@ inline bool CBaseLayer::IsBackwardNeeded() const return isBackwardNeeded == BS_NeedsBackward; } -inline CString CBaseLayer::GetPath() const +inline CString CBaseLayer::GetPath( const char* sep ) const { - return dnn == nullptr || dnn->owner == nullptr ? name : dnn->owner->GetPath() + "/" + name; + return ( dnn == nullptr || dnn->owner == nullptr ) ? name : ( dnn->owner->GetPath( sep ) + sep + name ); +} + +inline void CBaseLayer::GetPath( CArray& path ) const +{ + path.DeleteAll(); + getPath( path ); +} + +inline void CBaseLayer::getPath( CArray& path ) const +{ + if( dnn == nullptr ) { + return; + } + if( dnn->owner != nullptr ) { + dnn->owner->getPath( path ); + } + path.Add( name ); } inline void CBaseLayer::CheckLayerArchitecture( bool expr, const char* message ) const diff --git a/NeoML/include/NeoML/Dnn/DnnHead.h b/NeoML/include/NeoML/Dnn/DnnHead.h new file mode 100644 index 000000000..05f4b5cfc --- /dev/null +++ b/NeoML/include/NeoML/Dnn/DnnHead.h @@ -0,0 +1,100 @@ +/* Copyright © 2024 ABBYY + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--------------------------------------------------------------------------------------------------------------*/ + +#pragma once + +#include +#include +#include +#include + +namespace NeoML { + +template +class CLayerWrapper; +class CDnnHeadAdapterLayer; +class CGraph; + +namespace optimization { +int OptimizeDnnHeadAdapters( NeoML::CGraph& ); +} + +class CDnnHead : public IObject { +public: + CDnnHead() = default; + + template + CDnnHead(CRandom& random, IMathEngine& mathEngine, CLayerWrapper... linearWrappers) + { + CDnn* head(new CDnn(random, mathEngine)); + + CPtr source = new CCompositeSourceLayer(head->GetMathEngine()); + source->SetName("source"); + head->AddLayer(*source); + CBaseLayer* inputLayer = source; + + // chain connect wrapped layers + using TExpanding = CBaseLayer * []; + TExpanding{ inputLayer = linearWrappers(inputLayer)... }; + + CPtr headSink = new CCompositeSinkLayer(head->GetMathEngine()); + headSink->SetName("sink"); + head->AddLayer(*headSink); + headSink->Connect(0, *(inputLayer)); + dnn = head; + } + + CDnn& GetDnn() { return *dnn; } + +private: + ~CDnnHead() override + { + if( dnn != nullptr ) { + delete dnn; + dnn = nullptr; + } + } + + void increment() + { + if( ++headCounter == connections.Size() ) { + headCounter = 0; + firstAdapterNum = -1; + } + } + + CDnn* dnn = nullptr; + + // Stores all adapter using this head + CObjectArray connections; + // Layers for which input/output blobs are stored for Backward/Learn + CArray inputLayers; + CArray outputLayers; + // Pointers to source/sink layers of inner network + CCompositeSourceLayer* sourceLayer = nullptr; + CCompositeSinkLayer* sinkLayer = nullptr; + // Which of the blobs will be used during backward + int blobsForBackward = 0; + // Which of the blobs will be used during learn + int blobsForLearn = 0; + + int headCounter = 0; + int firstAdapterNum = -1; + + friend class CDnnHeadAdapterLayer; + friend int optimization::OptimizeDnnHeadAdapters( CGraph& ); +}; + +} // namespace NeoML diff --git a/NeoML/include/NeoML/Dnn/DnnOptimization.h b/NeoML/include/NeoML/Dnn/DnnOptimization.h index 7e7d157c0..aef0cf9a6 100644 --- a/NeoML/include/NeoML/Dnn/DnnOptimization.h +++ b/NeoML/include/NeoML/Dnn/DnnOptimization.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,6 +23,8 @@ class CDnn; // Struct which contains the details of optimization result struct NEOML_API CDnnOptimizationReport final { + // Number of HeadAdapters layers which internal dnn is optimized + int OptimizedHeadAdapters = 0; // Number of composite layers which where unpacked // (unpack == content of the layer moved to the root CDnn, composite itself is removed) int UnpackedCompositeLayers = 0; @@ -51,7 +53,8 @@ struct NEOML_API CDnnOptimizationReport final { // Check for is any optimization succeed inline bool CDnnOptimizationReport::IsOptimized() const { - return UnpackedCompositeLayers > 0 + return OptimizedHeadAdapters > 0 + || UnpackedCompositeLayers > 0 || RemovedTrivialLayers > 0 || FusedBatchNormalizations > 0 || ChannelwiseWith1x1NonResidual > 0 diff --git a/NeoML/include/NeoML/Dnn/DnnSolver.h b/NeoML/include/NeoML/Dnn/DnnSolver.h index 7a323f734..8d27bca4e 100644 --- a/NeoML/include/NeoML/Dnn/DnnSolver.h +++ b/NeoML/include/NeoML/Dnn/DnnSolver.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -55,7 +55,7 @@ class NEOML_API CDnnSolver : virtual public IObject { void SetMinMaxGradientClipping( float min, float max ) { clipGradientMin = min; clipGradientMax = max; } // Serialize to archive - virtual void Serialize( CArchive& archive, CDnn& dnn ); + virtual void Serialize( CArchive& archive, const CDnn& dnn ); protected: explicit CDnnSolver( IMathEngine& mathEngine ); @@ -86,18 +86,17 @@ class NEOML_API CDnnSolver : virtual public IObject { float clipGradientMax; // The blobs sum - struct CDiffBlobSum { - CDiffBlobSum() : Count( 0 ) {} - - CObjectArray Sum; // the blobs sums - int Count; // the number of terms in each sum + struct CDiffBlobSum final { + const CBaseLayer* LayerOwner{}; // for the given layer + CObjectArray Sum{}; // the blobs sums + int Count{}; // the number of terms in each sum }; // The buffers used to add up the gradients from several AddDiff calls - CMap layerToParamDiffBlobsSum; + CMap layerToParamDiffBlobsSum; // The buffers for storing gradients history and moment // Used in the inheriting classes - CMap> layerToGradientHistory; + CMap> layerToGradientHistory; // Layers which require reduction across distributed solver CHashTable layersToReduce; // Fast check if layer is included already CArray reduceOrder; // Correct order across all of the distributed nets @@ -112,6 +111,10 @@ class NEOML_API CDnnSolver : virtual public IObject { // Telling the compiler that we intentionally using two-parameter Serialize instead of one declared in IObject using IObject::Serialize; + // Convert maps from the previous serialization format + void loadPrevVersionDnnSolverMaps( CArchive& archive, const CDnn& dnn ); + + friend class CDnnHeadAdapterLayer; }; //////////////////////////////////////////////////////////////////////////////////////////////// @@ -170,7 +173,7 @@ class NEOML_API CDnnSimpleGradientSolver : public CDnnSolver { bool IsInCompatibilityMode() const { return isInCompatibilityMode; } void SetCompatibilityMode( bool compatibilityMode ) { isInCompatibilityMode = compatibilityMode; } - void Serialize( CArchive& archive, CDnn& dnn ) override; + void Serialize( CArchive& archive, const CDnn& dnn ) override; protected: void TrainLayer( const CBaseLayer* layer, const CObjectArray& paramBlobs, @@ -234,7 +237,7 @@ class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver { // May be called only before training starts. void EnableDecoupledWeightDecay( bool enable ); - void Serialize( CArchive& archive, CDnn& dnn ) override; + void Serialize( CArchive& archive, const CDnn& dnn ) override; protected: // Resets to the initial state @@ -335,7 +338,7 @@ class NEOML_API CDnnNesterovGradientSolver : public CDnnSolver { // May be called only before training starts. void EnableDecoupledWeightDecay( bool enable ); - void Serialize( CArchive& archive, CDnn& dnn ) override; + void Serialize( CArchive& archive, const CDnn& dnn ) override; protected: // Resets to the initial state @@ -482,7 +485,7 @@ class NEOML_API CDnnLambGradientSolver : public CDnnSolver { bool GetUseNVLamb() const { return useNvLamb; } void SetUseNVLamb( bool value ) { useNvLamb = value; } - void Serialize( CArchive& archive, CDnn& dnn ) override; + void Serialize( CArchive& archive, const CDnn& dnn ) override; protected: void TrainLayer( const CBaseLayer* layer, const CObjectArray& paramBlobs, diff --git a/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h b/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h new file mode 100644 index 000000000..a4909dc21 --- /dev/null +++ b/NeoML/include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h @@ -0,0 +1,83 @@ +/* Copyright © 2024 ABBYY + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--------------------------------------------------------------------------------------------------------------*/ + +#pragma once + +#include +#include +#include +#include + +namespace NeoML { + +// CDnnHeadAdapterLayer passes data blobs between multiple external layers and a shared internal DNN (head) +// Unlike CompositeLayer, it allows to connect several external layers to same head +class NEOML_API CDnnHeadAdapterLayer final : public CBaseLayer { + NEOML_DNN_LAYER( CDnnHeadAdapterLayer ) +public: + explicit CDnnHeadAdapterLayer( IMathEngine& mathEngine, const char* name = nullptr ) + : CBaseLayer( mathEngine, name == nullptr ? "CDnnHeadAdapterLayer" : name, /*isLearnable*/true ) + {} + + void Serialize( CArchive& archive ) override; + + // Internal shared Dnn between DnnHeadAdapters + void SetDnnHead( CPtr head ); + + // Get Dnn head + const CDnnHead* GetDnnHead() const { return head; }; + CDnnHead* GetDnnHead() { return head; }; + +protected: + void Reshape() override; + void RunOnce() override; + void BackwardOnce() override; + void LearnOnce() override; + // It does not allocate outputBlobs in CBaseLayer in runOnce, because they are not used for inference. + // The outputBlob for CDnnHeadAdapterLayer are sinkLayer->GetBlob() of its internalDnn. + void AllocateOutputBlobs() override {} + int BlobsForBackward() const override { return head->blobsForBackward; } + int BlobsForLearn() const override { return head->blobsForLearn; } + +private: + // Pointer to HeadLayer with inner dnn + CPtr head = nullptr; + // Save first adapter name to connect to necessary head in serialization + CString firstAdapter; + // Stores the number of the layer connected to the internal network + int num = -1; + // Temporarily used to store layers during serialization + CObjectArray layers; + // Stores the input/output blobs from last Inference + CObjectArray innerInputBlobs; + CObjectArray innerOutputBlobs; + + void OnDnnChanged( CDnn* ) override; + void processBackwardOrLearn(); + void configureAdapter(); + void configureFromHead(); + void saveBlobs(); + void loadBlobs(); + void configureForBackwardAndLearn(); +}; + +inline NEOML_API CLayerWrapper DnnHeadAdapter( CDnnHead* head ) +{ + return CLayerWrapper( "DnnHeadAdapter", [=]( CDnnHeadAdapterLayer* result ) { + result->SetDnnHead( head ); + } ); +} + +} // namespace NeoML diff --git a/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h b/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h index d1c9e029e..4065f350a 100644 --- a/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h +++ b/NeoML/include/NeoML/Dnn/Layers/DropoutLayer.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,6 +43,9 @@ class NEOML_API CDropoutLayer : public CBaseInPlaceLayer { bool IsBatchwise() const { return isBatchwise; } void SetBatchwise( bool value ); + // Set number of connected HeadAdapterLayers + void SetHeadCounter(int num) { headConnections = num; } + protected: ~CDropoutLayer() override { destroyDropoutDesc(); } @@ -58,6 +61,9 @@ class NEOML_API CDropoutLayer : public CBaseInPlaceLayer { bool isSpatial; // the spatial mode (channel-wise) bool isBatchwise; // the batchwise mode + int headConnections = 1; // stores number of HeadAdapter's using the dropout + int headCount = 0; // counter for BackwardOnce(), if headCount == headConnections, delete desc + void initDropoutDesc(); void destroyDropoutDesc(); }; diff --git a/NeoML/src/CMakeLists.txt b/NeoML/src/CMakeLists.txt index ba17e7a1e..1986e2771 100644 --- a/NeoML/src/CMakeLists.txt +++ b/NeoML/src/CMakeLists.txt @@ -116,6 +116,7 @@ set(NeoML_SOURCES Dnn/Layers/CtcLayer.cpp Dnn/Layers/CumSumLayer.cpp Dnn/Layers/DepthToSpaceLayer.cpp + Dnn/Layers/DnnHeadAdapterLayer.cpp Dnn/Layers/DotProductLayer.cpp Dnn/Layers/EnumBinarizationLayer.cpp Dnn/Layers/FocalLossLayer.cpp @@ -266,6 +267,7 @@ set(NeoML_HEADERS_COMPACT ../include/NeoML/Dnn/Dnn.inl ../include/NeoML/Dnn/DnnBlob.h ../include/NeoML/Dnn/DnnInitializer.h + ../include/NeoML/Dnn/DnnHead.h ../include/NeoML/Dnn/DnnLambdaHolder.h ../include/NeoML/Dnn/DnnSolver.h ../include/NeoML/Dnn/DnnSparseMatrix.h @@ -282,6 +284,7 @@ set(NeoML_HEADERS_COMPACT ../include/NeoML/Dnn/Layers/ConcatLayer.h ../include/NeoML/Dnn/Layers/ConvLayer.h ../include/NeoML/Dnn/Layers/DataLayer.h + ../include/NeoML/Dnn/Layers/DnnHeadAdapterLayer.h ../include/NeoML/Dnn/Layers/DropoutLayer.h ../include/NeoML/Dnn/Layers/EltwiseLayer.h ../include/NeoML/Dnn/Layers/FullyConnectedLayer.h diff --git a/NeoML/src/Dnn/Dnn.cpp b/NeoML/src/Dnn/Dnn.cpp index 47855b52f..51f608db6 100644 --- a/NeoML/src/Dnn/Dnn.cpp +++ b/NeoML/src/Dnn/Dnn.cpp @@ -71,6 +71,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -349,6 +350,7 @@ REGISTER_NEOML_LAYER( CCrfLayer, "FmlCnnCrfLayer" ) REGISTER_NEOML_LAYER( CCrfLossLayer, "FmlCnnCrfLossLayer" ) REGISTER_NEOML_LAYER( CCtcDecodingLayer, "FmlCnnCtcDecodingLayer" ) REGISTER_NEOML_LAYER( CCtcLossLayer, "FmlCnnCtcLossLayer" ) +REGISTER_NEOML_LAYER( CDnnHeadAdapterLayer, "NeoMLDnnHeadAdapterLayer" ) REGISTER_NEOML_LAYER( CDotProductLayer, "FmlCnnDotProductLayer" ) REGISTER_NEOML_LAYER( CEnumBinarizationLayer, "FmlCnnEnumBinarizationLayer" ) REGISTER_NEOML_LAYER( CGlobalMaxPoolingLayer, "FmlCnnGlobalMaxPoolingLayer" ) diff --git a/NeoML/src/Dnn/DnnOptimization.cpp b/NeoML/src/Dnn/DnnOptimization.cpp index 4ab59a52b..580965c95 100644 --- a/NeoML/src/Dnn/DnnOptimization.cpp +++ b/NeoML/src/Dnn/DnnOptimization.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ limitations under the License. #include "Optimization/MobileNetV3Optimizer.h" #include "Optimization/OptimizerFunctions.h" #include +#include #include namespace NeoML { @@ -33,6 +34,7 @@ CDnnOptimizationReport OptimizeDnn( CDnn& dnn, const CDnnOptimizationSettings& s CDnnOptimizationReport report; optimization::CGraph graph( dnn ); + report.OptimizedHeadAdapters = optimization::OptimizeDnnHeadAdapters( graph ); report.UnpackedCompositeLayers = optimization::UnpackComposites( graph ); report.RemovedTrivialLayers = optimization::RemoveTrivialLayers( graph ); optimization::CBatchNormFusionOptimizer( graph ).Apply( report ); diff --git a/NeoML/src/Dnn/DnnSolver.cpp b/NeoML/src/Dnn/DnnSolver.cpp index 3a2b09a50..99bebc9b2 100644 --- a/NeoML/src/Dnn/DnnSolver.cpp +++ b/NeoML/src/Dnn/DnnSolver.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -125,37 +125,8 @@ REGISTER_NEOML_SOLVER( CDnnLambGradientSolver, "NeoMLDnnLambGradientSolver" ) } //////////////////////////////////////////////////////////////////////////////////////////////////// -// Utility functions for serialization -void mapLayerIdToPtr( CDnnLayerGraph& dnn, CMap& result, const CString& prefix = "" ) -{ - CArray layerNames; - dnn.GetLayerList( layerNames ); - for( int layerIndex = 0; layerIndex < layerNames.Size(); ++layerIndex ) { - CPtr layer = dnn.GetLayer( layerNames[layerIndex] ); - result.Add( prefix + layer->GetName(), layer.Ptr() ); - CCompositeLayer* compositePtr = dynamic_cast( layer.Ptr() ); - if( compositePtr != nullptr ) { - mapLayerIdToPtr( *compositePtr, result, prefix + compositePtr->GetName() ); - } - } -} - -void mapLayerPtrToId( CDnnLayerGraph& dnn, CMap& result, const CString& prefix = "" ) -{ - CArray layerNames; - dnn.GetLayerList( layerNames ); - for( int layerIndex = 0; layerIndex < layerNames.Size(); ++layerIndex ) { - CPtr layer = dnn.GetLayer( layerNames[layerIndex] ); - result.Add( layer.Ptr(), prefix + layer->GetName() ); - CCompositeLayer* compositePtr = dynamic_cast( layer.Ptr() ); - if( compositePtr != nullptr ) { - mapLayerPtrToId( *compositePtr, result, prefix + compositePtr->GetName() ); - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// +static constexpr const char* const layerPathSeparator = "/"; CDnnSolver::CDnnSolver( IMathEngine& _mathEngine ) : mathEngine( _mathEngine ), @@ -179,8 +150,10 @@ void CDnnSolver::AddDiff( CBaseLayer* layer, const CObjectArray& param reduceOrder.Add( layer ); } - CDiffBlobSum& paramDiffBlobsSum = layerToParamDiffBlobsSum.GetOrCreateValue( layer ); - + CDiffBlobSum& paramDiffBlobsSum = layerToParamDiffBlobsSum.GetOrCreateValue( layer->GetPath( layerPathSeparator ) ); + if( paramDiffBlobsSum.LayerOwner == nullptr ) { + paramDiffBlobsSum.LayerOwner = layer; + } if( !sharedWeights ) { ++paramDiffBlobsSum.Count; } @@ -206,11 +179,13 @@ void CDnnSolver::Train( float distributedCoeff ) for( TMapPosition pos = layerToParamDiffBlobsSum.GetFirstPosition(); pos != NotFound; pos = layerToParamDiffBlobsSum.GetNextPosition( pos ) ) { - CBaseLayer* layer = layerToParamDiffBlobsSum.GetKey( pos ); + const CString layerPath = layerToParamDiffBlobsSum.GetKey( pos ); CDiffBlobSum& paramDiffBlobsSum = layerToParamDiffBlobsSum.GetValue( pos ); if( paramDiffBlobsSum.Sum.IsEmpty() ) { continue; } + const CBaseLayer* layer = paramDiffBlobsSum.LayerOwner; + NeoAssert( layer != nullptr ); NeoAssert( paramDiffBlobsSum.Count > 0 ); // Take the average of the gradients to simulate that the elements from all runs were in the same batch @@ -226,7 +201,7 @@ void CDnnSolver::Train( float distributedCoeff ) clipGradients( paramDiffBlobsSum.Sum ); // Train the layer based on the calculated diff data - TrainLayer( layer, layer->paramBlobs, paramDiffBlobsSum.Sum, layerToGradientHistory.GetOrCreateValue( layer ) ); + TrainLayer( layer, layer->paramBlobs, paramDiffBlobsSum.Sum, layerToGradientHistory.GetOrCreateValue( layerPath ) ); // Clear the diff data paramDiffBlobsSum.Sum.Empty(); @@ -322,57 +297,123 @@ void CDnnSolver::clipGradients(const CObjectArray& paramDiffBlobs) } } -static const int DnnSolverVersion = 1; +static CString concatLayerPath( const CArray& path ) +{ + CString layerPath = path[0]; + for( int i = 1; i < path.Size(); ++i ) { + layerPath += layerPathSeparator + path[i]; + } + return layerPath; +} + +void CDnnSolver::loadPrevVersionDnnSolverMaps( CArchive& archive, const CDnn& dnn ) +{ + CMap> layerPrevIdToPath; + auto mapLayerIdToPath = [&layerPrevIdToPath]( const CDnnLayerGraph& dnn, auto& mapLayerIdToPath ) -> void + { + CArray layerNames; + dnn.GetLayerList( layerNames ); + for( const char* layerName : layerNames ) { + const CBaseLayer* layer = dnn.GetLayer( layerName ); + const CString layerPath = layer->GetPath( "" ); + CArray& path = layerPrevIdToPath.GetOrCreateValue( layerPath ); + layer->GetPath( path ); + NeoAssert( path.Size() ); + const CCompositeLayer* composite = dynamic_cast( layer ); + if( composite != nullptr ) { + mapLayerIdToPath( *composite, mapLayerIdToPath ); + } + } + }; + mapLayerIdToPath( dnn, mapLayerIdToPath ); + + auto convertOldIdToLayerPath = [&]( const CBaseLayer** layer ) + { + CString layerId; + archive >> layerId; + const CArray& path = layerPrevIdToPath[layerId]; + if( layer != nullptr ) { + *layer = dnn.GetLayer( path ); + } + return concatLayerPath( path ); + }; + + int size; + archive >> size; + for( int i = 0; i < size; ++i ) { + const CBaseLayer* layerTemp = nullptr; + const CString layerPath = convertOldIdToLayerPath( &layerTemp ); + + CDiffBlobSum& blobSum = layerToParamDiffBlobsSum.GetOrCreateValue( layerPath ); + archive >> blobSum.Count; + SerializeBlobs( mathEngine, archive, blobSum.Sum ); + blobSum.LayerOwner = layerTemp; + } + + archive >> size; + for( int i = 0; i < size; ++i ) { + const CString layerPath = convertOldIdToLayerPath( nullptr ); + SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetOrCreateValue( layerPath ) ); + } +} + +static const int DnnSolverVersion = 2; -void CDnnSolver::Serialize( CArchive& archive, CDnn& dnn ) +void CDnnSolver::Serialize( CArchive& archive, const CDnn& dnn ) { const int version = archive.SerializeVersion( DnnSolverVersion ); if( archive.IsStoring() ) { - CMap layerPtrToId; - mapLayerPtrToId( dnn, layerPtrToId ); - archive << layerToParamDiffBlobsSum.Size(); for( int pos = layerToParamDiffBlobsSum.GetFirstPosition(); pos != NotFound; pos = layerToParamDiffBlobsSum.GetNextPosition( pos ) ) { - archive << layerPtrToId[layerToParamDiffBlobsSum.GetKey( pos )]; + CString layerPath = layerToParamDiffBlobsSum.GetKey( pos ); + const CBaseLayer* layer = layerToParamDiffBlobsSum.GetValue( pos ).LayerOwner; + NeoAssert( layer != nullptr ); + CArray path; + layer->GetPath( path ); + archive.Serialize( path ); + NeoAssert( path.Size() ); + archive << layerToParamDiffBlobsSum.GetValue( pos ).Count; SerializeBlobs( mathEngine, archive, layerToParamDiffBlobsSum.GetValue( pos ).Sum ); - } - archive << layerToGradientHistory.Size(); - for( int pos = layerToGradientHistory.GetFirstPosition(); pos != NotFound; - pos = layerToGradientHistory.GetNextPosition( pos ) ) - { - archive << layerPtrToId[layerToGradientHistory.GetKey( pos )]; - SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetValue( pos ) ); + const bool hasGradientHistory = layerToGradientHistory.Has( layerPath ); + archive << hasGradientHistory; + if( hasGradientHistory ) { + SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetValue( pos ) ); + } } archive << learningRate << regularizationL1 << regularizationL2 << maxGradientNorm; archive << clipGradientMin << clipGradientMax; } else { - CMap layerIdToPtr; - mapLayerIdToPtr( dnn, layerIdToPtr ); - layerToParamDiffBlobsSum.DeleteAll(); layerToGradientHistory.DeleteAll(); layersToReduce.DeleteAll(); reduceOrder.DeleteAll(); - int size; - archive >> size; - for( int i = 0; i < size; ++i ) { - CString layerId; - archive >> layerId; - CDiffBlobSum& blobSum = layerToParamDiffBlobsSum.GetOrCreateValue( layerIdToPtr[layerId] ); - archive >> blobSum.Count; - SerializeBlobs( mathEngine, archive, blobSum.Sum ); - } - - archive >> size; - for( int i = 0; i < size; ++i ) { - CString layerId; - archive >> layerId; - SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetOrCreateValue( layerIdToPtr[layerId] ) ); + if( version >= 2 ) { + int size; + archive >> size; + for( int i = 0; i < size; ++i ) { + CArray path; + archive.Serialize( path ); + NeoAssert( path.Size() ); + + const CString layerPath = concatLayerPath( path ); + CDiffBlobSum& blobSum = layerToParamDiffBlobsSum.GetOrCreateValue( layerPath ); + archive >> blobSum.Count; + SerializeBlobs( mathEngine, archive, blobSum.Sum ); + blobSum.LayerOwner = dnn.GetLayer( path ); + + bool hasGradientHistory; + archive >> hasGradientHistory; + if( hasGradientHistory ) { + SerializeBlobs( mathEngine, archive, layerToGradientHistory.GetOrCreateValue( layerPath ) ); + } + } + } else { + loadPrevVersionDnnSolverMaps( archive, dnn ); } archive >> learningRate >> regularizationL1 >> regularizationL2 >> maxGradientNorm; if( version >= 1 ) { @@ -396,7 +437,7 @@ CDnnSimpleGradientSolver::CDnnSimpleGradientSolver( IMathEngine& mathEngine ) : static const int DnnSimpleGradientSolverVersion = 0; -void CDnnSimpleGradientSolver::Serialize( CArchive& archive, CDnn& dnn ) +void CDnnSimpleGradientSolver::Serialize( CArchive& archive, const CDnn& dnn ) { archive.SerializeVersion( DnnSimpleGradientSolverVersion ); CDnnSolver::Serialize( archive, dnn ); @@ -491,7 +532,7 @@ void CDnnAdaptiveGradientSolver::EnableDecoupledWeightDecay( bool enable ) static const int DnnAdaptiveGradientSolver = 1; -void CDnnAdaptiveGradientSolver::Serialize( CArchive& archive, CDnn& dnn ) +void CDnnAdaptiveGradientSolver::Serialize( CArchive& archive, const CDnn& dnn ) { const int version = archive.SerializeVersion( DnnAdaptiveGradientSolver ); CDnnSolver::Serialize( archive, dnn ); @@ -665,7 +706,7 @@ void CDnnNesterovGradientSolver::EnableDecoupledWeightDecay( bool enable ) static const int DnnNesterovGradientSolverVersion = 1; -void CDnnNesterovGradientSolver::Serialize( CArchive& archive, CDnn& dnn ) +void CDnnNesterovGradientSolver::Serialize( CArchive& archive, const CDnn& dnn ) { const int version = archive.SerializeVersion( DnnNesterovGradientSolverVersion ); CDnnSolver::Serialize( archive, dnn ); @@ -850,7 +891,7 @@ void CDnnLambGradientSolver::ExcludeBiasParamLayers() static const int DnnLambGradientSolverVersion = 0; -void CDnnLambGradientSolver::Serialize( CArchive& archive, CDnn& dnn ) +void CDnnLambGradientSolver::Serialize( CArchive& archive, const CDnn& dnn ) { archive.SerializeVersion( DnnLambGradientSolverVersion ); CDnnSolver::Serialize( archive, dnn ); diff --git a/NeoML/src/Dnn/Layers/DnnHeadAdapterLayer.cpp b/NeoML/src/Dnn/Layers/DnnHeadAdapterLayer.cpp new file mode 100644 index 000000000..12684c9a1 --- /dev/null +++ b/NeoML/src/Dnn/Layers/DnnHeadAdapterLayer.cpp @@ -0,0 +1,274 @@ +/* Copyright © 2024 ABBYY + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http ://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +-------------------------------------------------------------------------------------------------------------- */ + +#include +#pragma hdrstop + +#include +#include +#include +#include + +namespace NeoML { + +static const int DnnHeadAdapterLayerVersion = 0; + +void CDnnHeadAdapterLayer::Serialize( CArchive& archive ) +{ + archive.SerializeVersion( DnnHeadAdapterLayerVersion ); + CBaseLayer::Serialize( archive ); + + bool existHead = ( head != nullptr ); + archive.Serialize( existHead ); + if( !existHead ) { + return; + } + + if( archive.IsStoring() ) { + archive << head->headCounter; + if( head->headCounter > 0 ) { + CString name( head->connections[0]->GetName() ); + archive << name; + } else { + NeoAssert( head->dnn != nullptr ); + archive << head->dnn->layers.Size(); + for( int i = 0; i < head->dnn->layers.Size(); i++ ) { + SerializeLayer( archive, MathEngine(), head->dnn->layers[i] ); + } + } + + head->increment(); + } else if( archive.IsLoading() ) { + int num; + archive >> num; + if( num > 0 ) { + archive >> firstAdapter; + } else { + int layerSize; + archive >> layerSize; + layers.SetSize( layerSize ); + for( int i = 0; i < layerSize; i++ ) { + SerializeLayer( archive, MathEngine(), layers[i] ); + } + } + } else { + NeoAssert( false ); + } +} + +void CDnnHeadAdapterLayer::OnDnnChanged( CDnn* ) +{ + // If first adapter - create head dnn and initialize layers + // Else sets the internal DNN head using the first connected adapter layer after serialization + if( head == nullptr ) { + if( !firstAdapter.IsEmpty() ) { + SetDnnHead( static_cast( ( GetDnn()->GetLayer( firstAdapter ).Ptr() ) )->head ); + } else if( !layers.IsEmpty() ) { + if( GetDnn() != 0 ) { + CDnn* internalDnn = FINE_DEBUG_NEW CDnn( GetDnn()->Random(), GetDnn()->GetMathEngine() ); + + for( int i = 0; i < layers.Size(); ++i ) { + internalDnn->AddLayer( *layers[i] ); + } + head = new CDnnHead(); + head->dnn = internalDnn; + SetDnnHead( head ); + layers.DeleteAll(); + } + } + } +} + +void CDnnHeadAdapterLayer::Reshape() +{ + if( head->headCounter > 0 ) { + configureFromHead(); + return; + } + + configureAdapter(); +} + +void CDnnHeadAdapterLayer::RunOnce() +{ + NeoAssert( inputBlobs.Size() == 1 ); + NeoAssert( head->dnn != nullptr ); + + head->sourceLayer->SetBlob( inputBlobs[0] ); + head->dnn->isReuseMemoryMode = GetDnn()->isReuseMemoryMode; + head->dnn->runOnce( GetDnn()->GetCurrentSequencePos() ); + outputBlobs[0] = head->sinkLayer->GetInputBlob()->GetCopy(); + + // save blobs required for next backward/learn + if( IsBackwardNeeded() || IsLearningEnabled() ) { + saveBlobs(); + } +} + +void CDnnHeadAdapterLayer::processBackwardOrLearn() +{ + NeoAssert( head->dnn->isBackwardPerformed == GetDnn()->isBackwardPerformed ); + + if( IsBackwardNeeded() ) { + head->sourceLayer->SetDiffBlob( inputDiffBlobs[0] ); + } + + head->sinkLayer->SetDiffBlob( outputDiffBlobs[0] ); + + // loading blobs for backward/learn from last RunOnce + loadBlobs(); + + head->dnn->backwardRunAndLearnOnce( GetDnn()->GetCurrentSequencePos() ); + innerInputBlobs.DeleteAll(); + innerInputBlobs.DeleteAll(); + + if( head->headCounter == head->connections.Size() - 1 ) { + for( const CBaseLayer* layer : head->dnn->layers ) { + if( layer->IsLearningPerformed() ) { + int& layerCount = GetDnn()->GetSolver()->layerToParamDiffBlobsSum.GetOrCreateValue( layer->GetPath() ).Count; + layerCount = layerCount - head->connections.Size() + 1; + } + } + } + head->increment(); +} + +void CDnnHeadAdapterLayer::BackwardOnce() +{ + processBackwardOrLearn(); +} + +void CDnnHeadAdapterLayer::LearnOnce() +{ + if( !IsBackwardPerformed() ) { + processBackwardOrLearn(); + } +} + +void CDnnHeadAdapterLayer::SetDnnHead( CPtr _head ) +{ + head = _head; + num = head->connections.Size(); + head->connections.Add( this ); + ForceReshape(); +} + +void CDnnHeadAdapterLayer::configureAdapter() +{ + NeoAssert( head->dnn != nullptr ); + head->sinkLayer = CheckCast( head->dnn->GetLayer( "sink" ).Ptr() ); + head->sourceLayer = CheckCast( head->dnn->GetLayer( "source" ).Ptr() ); + if( head->sourceLayer->GetBackwardForced() != IsBackwardNeeded() ) { + head->sourceLayer->SetBackwardForced( IsBackwardNeeded() ); + } + head->sourceLayer->SetBlobDesc( inputDescs[0] ); + // If the backward pass requirements have changed, call reshape + bool forcedReshape = head->dnn->IsBackwardPerformed() != GetDnn()->IsBackwardPerformed(); + + // Set the internal network parameters from the external network parameters + head->dnn->setProcessingParams( GetDnn()->IsRecurrentMode(), GetDnn()->GetMaxSequenceLength(), + GetDnn()->IsReverseSequense(), GetDnn()->IsBackwardPerformed() ); + head->dnn->RequestReshape( forcedReshape ); + head->dnn->SetInitializer( GetDnn()->GetInitializer() ); + + head->dnn->SetSolver( GetDnn()->GetSolver() ); + head->dnn->reshape(); + configureForBackwardAndLearn(); + + outputDescs[0] = head->sinkLayer->inputDescs[0]; + head->firstAdapterNum = num; + head->increment(); +} + +void CDnnHeadAdapterLayer::configureFromHead() +{ + outputDescs[0] = head->sinkLayer->inputDescs[0]; + head->increment(); +} + +void CDnnHeadAdapterLayer::saveBlobs() +{ + for( int i = 0; i < head->inputLayers.Size(); ++i ) { + innerInputBlobs.Add( head->inputLayers[i]->inputBlobs[0]->GetCopy() ); + } + + for( int i = 0; i < head->outputLayers.Size(); ++i ) { + innerOutputBlobs.Add( head->outputLayers[i]->outputBlobs[0]->GetCopy() ); + } +} + +void CDnnHeadAdapterLayer::loadBlobs() +{ + for( int i = 0; i < head->inputLayers.Size(); ++i ) { + head->inputLayers[i]->inputBlobs[0] = innerInputBlobs[i]; + } + + for( int i = 0; i < head->outputLayers.Size(); ++i ) { + head->outputLayers[i]->outputBlobs[0] = innerOutputBlobs[i]; + } +} + +void CDnnHeadAdapterLayer::configureForBackwardAndLearn() +{ + head->blobsForBackward = 0; + head->blobsForLearn = 0; + const bool hasBackward = IsBackwardPerformed(); + bool hasLearn = IsLearningPerformed(); + + for( int i = 0; i < head->dnn->layers.Size(); ++i ) { + hasLearn |= head->dnn->layers[i]->IsLearningPerformed(); + auto layer = dynamic_cast( head->dnn->layers[i].Ptr() ); + if( layer != nullptr ) { + layer->SetHeadCounter( head->connections.Size() ); + } + } + + if( !hasBackward && !hasLearn ) { + return; + } + + for( int layerIndex = 0; layerIndex < head->dnn->layers.Size(); ++layerIndex ) { + const CBaseLayer& layer = *head->dnn->layers[layerIndex]; + if( layer.IsBackwardPerformed() && ( layer.BlobsForBackward() & TInputBlobs ) ) { + head->inputLayers.Add( head->dnn->layers[layerIndex] ); + } else if( layer.IsLearningPerformed() && ( layer.BlobsForLearn() & TInputBlobs ) ) { + head->inputLayers.Add( head->dnn->layers[layerIndex] ); + } + + if( layer.IsBackwardPerformed() && ( layer.BlobsForBackward() & TOutputBlobs ) ) { + head->outputLayers.Add( head->dnn->layers[layerIndex] ); + } else if( layer.IsLearningPerformed() && ( layer.BlobsForLearn() & TOutputBlobs ) ) { + head->outputLayers.Add( head->dnn->layers[layerIndex] ); + } + + if( ( !hasBackward || head->blobsForBackward != 0 ) && ( !hasLearn || head->blobsForLearn != 0 ) ) { + break; + } + + for( int inputIndex = 0; inputIndex < layer.GetInputCount(); ++inputIndex ) { + if( dynamic_cast( layer.GetInputLayer( inputIndex ) ) != nullptr ) { + if( hasBackward && layer.IsBackwardPerformed() && ( layer.BlobsForBackward() & TInputBlobs ) != 0 ) { + head->blobsForBackward |= TInputBlobs; + } + if( hasLearn && layer.IsLearningPerformed() && ( layer.BlobsForLearn() & TInputBlobs ) != 0 ) { + head->blobsForLearn |= TInputBlobs; + } + break; + } + } + } +} + +} // namespace NeoML diff --git a/NeoML/src/Dnn/Layers/DropoutLayer.cpp b/NeoML/src/Dnn/Layers/DropoutLayer.cpp index b833451e7..2d6dd8aec 100644 --- a/NeoML/src/Dnn/Layers/DropoutLayer.cpp +++ b/NeoML/src/Dnn/Layers/DropoutLayer.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -103,7 +103,8 @@ void CDropoutLayer::BackwardOnce() MathEngine().Dropout( *desc, outputDiffBlobs[0]->GetData(), inputDiffBlobs[0]->GetData() ); - if( !GetDnn()->IsRecurrentMode() || GetDnn()->IsFirstSequencePos() ) { + if( ( !GetDnn()->IsRecurrentMode() || GetDnn()->IsFirstSequencePos() ) && ++headCount == headConnections ) { + headCount = 0; // Clear the memory after the whole sequence is processed destroyDropoutDesc(); } @@ -112,8 +113,8 @@ void CDropoutLayer::BackwardOnce() void CDropoutLayer::initDropoutDesc() { if( desc == 0 ) { - desc = MathEngine().InitDropout( dropoutRate, isSpatial, isBatchwise, inputBlobs[0]->GetDesc(), outputBlobs[0]->GetDesc(), - GetDnn()->Random().Next() ); + desc = MathEngine().InitDropout( dropoutRate, isSpatial, isBatchwise, inputBlobs[0]->GetDesc(), + outputBlobs[0]->GetDesc(), GetDnn()->Random().Next() ); } } diff --git a/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp b/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp index c27844b75..1f5b488e2 100644 --- a/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp +++ b/NeoML/src/Dnn/Optimization/OptimizerFunctions.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ limitations under the License. #include "OptimizerFunctions.h" #include #include +#include #include #include #include @@ -28,7 +29,7 @@ namespace NeoML { namespace optimization { // Returns copy of an original layer -CPtr copyLayer( CBaseLayer& original ) +static CPtr copyLayer( CBaseLayer& original ) { CMemoryFile file; { @@ -150,6 +151,27 @@ int UnpackComposites( CGraph& graph ) //--------------------------------------------------------------------------------------------------------------------- +int OptimizeDnnHeadAdapters( CGraph& graph ) +{ + CArray layers; + graph.GetLayers( layers ); + + int result = 0; + for( CBaseLayer* layer : layers ) { + CDnnHeadAdapterLayer* adapter = dynamic_cast( layer ); + if( adapter != nullptr ) { + CDnnHead* head = adapter->GetDnnHead(); + NeoAssert( head != nullptr ); + if( OptimizeDnn( head->GetDnn() ).IsOptimized() ) { + ++result; + } + } + } + return result; +} + +//--------------------------------------------------------------------------------------------------------------------- + int RemoveTrivialLayers( CGraph& graph ) { int trivialLayersRemoved = 0; @@ -176,6 +198,6 @@ int RemoveTrivialLayers( CGraph& graph ) return trivialLayersRemoved; } -} +} // namespace optimization -} +} // namespace NeoML diff --git a/NeoML/src/Dnn/Optimization/OptimizerFunctions.h b/NeoML/src/Dnn/Optimization/OptimizerFunctions.h index d9bebbcb5..ccbd66122 100644 --- a/NeoML/src/Dnn/Optimization/OptimizerFunctions.h +++ b/NeoML/src/Dnn/Optimization/OptimizerFunctions.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,6 +25,9 @@ class CGraph; // Returns the number of unpacked composites int UnpackComposites( CGraph& graph ); +// TODO: unpack if the only head adapter is using a head dnn +int OptimizeDnnHeadAdapters( CGraph& graph ); + // Removes trivial layers (dropouts, linear(1,0) etc.) // Returns the number of removed layers int RemoveTrivialLayers( CGraph& graph ); diff --git a/NeoML/test/data/LayersSerializationTestData/NeoMLDnnHeadAdapterLayer.arch b/NeoML/test/data/LayersSerializationTestData/NeoMLDnnHeadAdapterLayer.arch new file mode 100644 index 000000000..d4a7bd30b Binary files /dev/null and b/NeoML/test/data/LayersSerializationTestData/NeoMLDnnHeadAdapterLayer.arch differ diff --git a/NeoML/test/src/CMakeLists.txt b/NeoML/test/src/CMakeLists.txt index 7599ee194..2fc085a7d 100644 --- a/NeoML/test/src/CMakeLists.txt +++ b/NeoML/test/src/CMakeLists.txt @@ -13,6 +13,7 @@ target_sources(${PROJECT_NAME} INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CtcTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DnnBlobTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DnnDistributedTest.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/DnnHeadTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DnnLayersSerializationTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DnnSerializationTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DnnSimpleTests.cpp diff --git a/NeoML/test/src/DnnHeadTest.cpp b/NeoML/test/src/DnnHeadTest.cpp new file mode 100644 index 000000000..8a8f67e3f --- /dev/null +++ b/NeoML/test/src/DnnHeadTest.cpp @@ -0,0 +1,301 @@ +/* Copyright © 2024 ABBYY + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--------------------------------------------------------------------------------------------------------------*/ + +#include +#pragma hdrstop + +#include +#include +#include + +using namespace NeoML; +using namespace NeoMLTest; + +//---------------------------------------------------------------------------------------------------------------------- + +namespace NeoMLTest { + +static void initializeDnnBlobs( CDnn& dnn ) +{ + CRandom random( 0 ); + CDnnUniformInitializer init( random, -0.5, 0.5 ); + + CDnnBlob* source1Blob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 4, 2, 3, 10 } ); + init.InitializeLayerParams( *source1Blob, -1 ); + CheckCast( dnn.GetLayer( "source1" ).Ptr() )->SetBlob( source1Blob ); + + CDnnBlob* source2Blob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 4, 2, 3, 10 } ); + init.InitializeLayerParams( *source2Blob, -1 ); + CheckCast( dnn.GetLayer( "source2" ).Ptr() )->SetBlob( source2Blob ); + + CDnnBlob* source3Blob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 4, 2, 3, 10 } ); + init.InitializeLayerParams( *source3Blob, -1 ); + CheckCast( dnn.GetLayer( "source3" ).Ptr() )->SetBlob( source3Blob ); + + CDnnBlob* targetBlob = CDnnBlob::CreateTensor( MathEngine(), CT_Float, { 1, 1, 1, 1, 1, 1, 3 } ); + targetBlob->GetData().SetValueAt( 0, -1.5f ); + targetBlob->GetData().SetValueAt( 1, 2.4f ); + targetBlob->GetData().SetValueAt( 2, 4.8f ); + CheckCast( dnn.GetLayer( "target" ).Ptr() )->SetBlob( targetBlob ); +} + +static void createDnn( CDnn& dnn, bool isNaive, int complexity = 1000, float dropoutRate = 0.3f, bool freeTerm = false ) +{ + CPtr source1 = Source( dnn, "source1" ); + CPtr source2 = Source( dnn, "source2" ); + CPtr source3 = Source( dnn, "source3" ); + CPtr targets = Source( dnn, "target" ); + + CPtr fc01 = FullyConnected( complexity, freeTerm )( "fc01", source1.Ptr() ); + CPtr fc02 = FullyConnected( complexity, freeTerm )( "fc02", source2.Ptr() ); + CPtr fc03 = FullyConnected( complexity, freeTerm )( "fc03", source3.Ptr() ); + + CPtr concat; + + if( isNaive ) { + // Same architecture but without Head to compare [target] + // | + // [source1] --> [fc01] --> [ ]-->[gelu]-->[ ]-->[relu]-->[ ] --> [ ] v + // |fc1] |fc2| |fc3| |concat| --> [loss] + // [source2] --> [fc02] --> [ ]-->[gelu]-->[ ]-->[relu]-->[ ] --> [ ] + // + + CPtr fc1 = FullyConnected( complexity / 20, freeTerm )( "fc1", fc01.Ptr(), fc02.Ptr(), fc03.Ptr() ); + CPtr gelu1 = Gelu()( "gelu1", CDnnLayerLink{ fc1, 0 } ); + CPtr gelu2 = Gelu()( "gelu2", CDnnLayerLink{ fc1, 1 } ); + CPtr gelu3 = Gelu()( "gelu3", CDnnLayerLink{ fc1, 2 } ); + + CPtr fc2 = FullyConnected( complexity / 60, freeTerm )( "fc2", gelu1.Ptr(), gelu2.Ptr(), gelu3.Ptr() ); + CPtr relu1 = Relu()( "relu1", CDnnLayerLink{ fc2, 0 } ); + CPtr relu2 = Relu()( "relu2", CDnnLayerLink{ fc2, 1 } ); + CPtr relu3 = Relu()( "relu3", CDnnLayerLink{ fc2, 2 } ); + + CPtr dropout1 = Dropout( dropoutRate )( "dp1", relu1.Ptr() ); + CPtr dropout2 = Dropout( dropoutRate )( "dp2", relu2.Ptr() ); + CPtr dropout3 = Dropout( dropoutRate )( "dp3", relu3.Ptr() ); + CPtr fc3 = FullyConnected( 1 )( "fc3", dropout1.Ptr(), dropout2.Ptr(), dropout3.Ptr() ); + + concat = ConcatChannels()( "concat", + CDnnLayerLink{ fc3, 0 }, CDnnLayerLink{ fc3, 1 }, CDnnLayerLink{ fc3, 2 } ); + + } else { + // +-----[fc01]- ---+ + // | | +-----------+ [target] + // | v | | | + // [source1] |-----------------------------------------| v v + // |[fc1]->[gelu]->[fc2]->[relu]->[dp]->[fc3]| [concat]->[loss] + // [source2] |-----------------------------------------| ^ + // | ^ | | + // | | +-----------+ + // +-----[fc02]-----+ + + CPtr head = new CDnnHead( + dnn.Random(), dnn.GetMathEngine(), + FullyConnected( complexity / 20, freeTerm ), // "fc1" + Gelu(), + FullyConnected( complexity / 60, freeTerm ), // "fc2" + Relu(), + Dropout( dropoutRate ), + FullyConnected( 1 ) // "fc3", + ); + + CPtr head1 = DnnHeadAdapter( head )( "head1", fc01.Ptr() ); + CPtr head2 = DnnHeadAdapter( head )( "head2", fc02.Ptr() ); + CPtr head3 = DnnHeadAdapter( head )( "head3", fc03.Ptr() ); + + concat = ConcatChannels()( "concat", head1.Ptr(), head2.Ptr(), head3.Ptr() ); + } + + CPtr loss = EuclideanLoss()( "loss", concat.Ptr(), targets.Ptr() ); + CPtr sink = Sink( concat.Ptr(), "sink" ); + + CPtr solver = new CDnnAdaptiveGradientSolver( MathEngine() ); + solver->SetLearningRate( /*learningRate*/1e-3f ); + dnn.SetSolver( solver.Ptr() ); + + initializeDnnBlobs( dnn ); +} + +static void testDnnAdapterPerformace( bool isNaive, int interations = 1000, bool train = true ) +{ + IPerformanceCounters* counters = MathEngine().CreatePerformanceCounters(); + const char* fileName = "DnnAdapter.cnnarch"; + + GTEST_LOG_( INFO ) << "\n interations = " << interations << " is_naive = " << isNaive << "\n" + << "|" << std::setw( 10 ) << "size " + << "|" << std::setw( 21 ) << "Train " << "|" << std::setw( 21 ) << "Inference " << "|\n" + << "|" << std::setw( 10 ) << "" + << "|" << std::setw( 10 ) << "time (ms) " << "|" << std::setw( 10 ) << "mem (MB) " + << "|" << std::setw( 10 ) << "time (ms) " << "|" << std::setw( 10 ) << "mem (MB) " << "|\n"; + + const int complexity = 1000; + for( int size = 1 * complexity; size <= 4 * complexity; size += complexity ) { + { + CRandom random( 0 ); + CDnn dnn( random, MathEngine() ); + + createDnn( dnn, isNaive, size ); + OptimizeDnn( dnn ); + + dnn.CleanUp( /*force*/true ); + initializeDnnBlobs( dnn ); + + MathEngine().CleanUp(); + MathEngine().ResetPeakMemoryUsage(); + + if( train ) { + dnn.RunAndLearnOnce(); + counters->Synchronise(); + for( int i = 0; i < interations; ++i ) { + dnn.RunAndLearnOnce(); + } + counters->Synchronise(); + } + CArchiveFile file( fileName, CArchive::store, GetPlatformEnv() ); + CArchive archive( &file, CArchive::store ); + archive << dnn; + } + double train_time = train ? ( double( ( *counters )[0].Value ) / 1000000 ) : 0.; + double train_mem = train ? ( double( MathEngine().GetPeakMemoryUsage() ) / 1024 / 1024 ) : 0.; + + { + CRandom random( 0 ); + CDnn dnn( random, MathEngine() ); + + CArchiveFile file( fileName, CArchive::load, GetPlatformEnv() ); + CArchive archive( &file, CArchive::load ); + archive >> dnn; + + dnn.CleanUp( /*force*/true ); + initializeDnnBlobs( dnn ); + + MathEngine().CleanUp(); + MathEngine().ResetPeakMemoryUsage(); + + dnn.RunOnce(); + counters->Synchronise(); + for( int i = 0; i < interations; ++i ) { + dnn.RunOnce(); + } + counters->Synchronise(); + } + double inference_time = double( ( *counters )[0].Value ) / 1000000; + double inference_mem = double( MathEngine().GetPeakMemoryUsage() ) / 1024 / 1024; + + std::cout + << "|" << std::setw( 10 ) << size + << "|" << std::setw( 10 ) << train_time << "|" << std::setw( 10 ) << train_mem + << "|" << std::setw( 10 ) << inference_time << "|" << std::setw( 10 ) << inference_mem << "|\n"; + } + delete counters; +} + +} // namespace NeoMLTest + +//---------------------------------------------------------------------------------------------------------------------- + +TEST( CDnnHeadTest, DnnHeadAdapterLearnTest ) +{ + CRandom random( 0x17 ); + CDnn dnn( random, MathEngine() ); + createDnn( dnn, /*isNaive*/false, /*complexity*/1000, /*dropout*/0.f ); + + for( int i = 0; i < 200; ++i ) { + dnn.RunAndLearnOnce(); + } + + EXPECT_NEAR( CheckCast( dnn.GetLayer( "loss" ).Ptr() )->GetLastLoss(), 0, 1e-3f ); +} + +TEST( CDnnHeadTest, DnnHeadAdapterInferenceMatch ) +{ + auto runOnce = []( bool isNaive ) + { + CRandom random( 0x11 ); + CPtr init = new CDnnUniformInitializer( random, 0.05f, 0.05f ); + + CDnn dnn( random, MathEngine() ); + dnn.SetInitializer( init.Ptr() ); + createDnn( dnn, isNaive ); + + dnn.RunOnce(); + return CheckCast( dnn.GetLayer( "sink" ).Ptr() )->GetBlob(); + }; + + CPtr expected = runOnce( /*isNaive*/false ); + CPtr output = runOnce( /*isNaive*/true ); + + EXPECT_TRUE( CompareBlobs( *expected, *output ) ); +} + +TEST( CDnnHeadTest, DnnHeadAdapterLearningMatch ) +{ + CRandom random( 0x01 ); + CPtr init = new CDnnUniformInitializer( random, 0.05f, 0.05f ); + + CDnn dnnNoAdapters( random, MathEngine() ); + dnnNoAdapters.SetInitializer( init.Ptr() ); + createDnn( dnnNoAdapters, /*isNaive*/true, /*complexity*/1000, /*dropout*/0.f, /*freeTerm*/false ); + + CRandom randomWithAdapters( 0x01 ); + CDnn dnnWithAdapters( randomWithAdapters, MathEngine() ); + dnnWithAdapters.SetInitializer( init.Ptr() ); + createDnn( dnnWithAdapters, /*isNaive*/false, /*complexity*/1000, /*dropout*/0.f, /*freeTerm*/false ); + + CPtr expectedLoss = CheckCast( dnnNoAdapters.GetLayer( "loss" ).Ptr() ); + CPtr outputLoss = CheckCast( dnnWithAdapters.GetLayer( "loss" ).Ptr() ); + + for( int i = 0; i < 100; ++i ) { + dnnNoAdapters.RunAndLearnOnce(); + dnnWithAdapters.RunAndLearnOnce(); + EXPECT_NEAR( expectedLoss->GetLastLoss(), outputLoss->GetLastLoss(), 1e-3f ); + } +} + +TEST( CDnnHeadTest, DnnHeadAdapterSerializationTest ) +{ + CRandom random( 0 ); + CDnn dnn( random, MathEngine() ); + + createDnn( dnn, /*isNaive*/false ); + dnn.RunOnce(); + + CPtr expected = CheckCast( dnn.GetLayer( "sink" ).Ptr() )->GetBlob(); + { + CMemoryFile file; + { + CArchive archive( &file, CArchive::store ); + dnn.Serialize( archive ); + } + file.SeekToBegin(); + { + CArchive archive( &file, CArchive::load ); + dnn.Serialize( archive ); + } + } + initializeDnnBlobs( dnn ); + dnn.RunOnce(); + CPtr output = CheckCast( dnn.GetLayer( "sink" ).Ptr() )->GetBlob(); + EXPECT_TRUE( CompareBlobs( *expected, *output ) ); +} + +TEST( CDnnHeadTest, DISABLED_DnnHeadAdapterInferencePerfomance ) +{ + DeleteMathEngine(); + testDnnAdapterPerformace( /*isNaive*/false, /*interations*/200 ); + + DeleteMathEngine(); + testDnnAdapterPerformace( /*isNaive*/true, /*interations*/200 ); +} diff --git a/NeoML/test/src/DnnLayersSerializationTest.cpp b/NeoML/test/src/DnnLayersSerializationTest.cpp index 5b9d80b58..d0b3916fb 100644 --- a/NeoML/test/src/DnnLayersSerializationTest.cpp +++ b/NeoML/test/src/DnnLayersSerializationTest.cpp @@ -3410,3 +3410,79 @@ GTEST_TEST( SerializeFromFile, LoraFullyConnectedLayerSerialization ) { checkSerializeLayer( "NeoMLDnnLoraFullyConnectedLayer" ); } + +// ==================================================================================================================== + +// CDnnHeadAdapterLayer + +static CPtr createDnnHeadAdapterNet( CDnn& dnn ) +{ + CPtr head = new CDnnHead( dnn.Random(), MathEngine(), + FullyConnected( 300 ), + Relu() + ); + + CPtr layerPtr = new CDnnHeadAdapterLayer( MathEngine() ); + layerPtr->SetName( LayerName ); + layerPtr->SetDnnHead( head ); + dnn.AddLayer( *layerPtr ); + + return layerPtr; +} + +#ifdef GENERATE_SERIALIZATION_FILES + +GTEST_TEST( SerializeToFile, DnnHeadAdapterLayerSerialization ) +{ + CRandom random; + CDnn dnn( random, MathEngine() ); + + CPtr layerPtr = createDnnHeadAdapterNet( dnn ); + setBaseParams( *layerPtr ); + + CArchiveFile file( getFileName( "NeoMLDnnHeadAdapterLayer" ), CArchive::store ); + CArchive archive( &file, CArchive::store ); + archive.Serialize( dnn ); +} + +#endif // GENERATE_SERIALIZATION_FILES + +template<> +inline void checkSpecificParams( CDnnHeadAdapterLayer& layer ) +{ + auto runOnce = []( CDnn& dnn, CDnnHeadAdapterLayer& adapter ) + { + CSourceLayer* source = dnn.HasLayer( "source" ) + ? CheckCast( dnn.GetLayer( "source" ).Ptr() ) + : Source( dnn, "source" ); + + CPtr blob = CDnnBlob::CreateVector( MathEngine(), CT_Float, 2 ); + blob->Fill( TestFloatValue ); + + source->SetBlob( blob ); + adapter.Connect( *source ); + + CSinkLayer* sink = dnn.HasLayer( "sink" ) + ? CheckCast( dnn.GetLayer( "sink" ).Ptr() ) + : Sink( &adapter, "sink" ); + + dnn.RunOnce(); + return sink->GetBlob(); + }; + + CRandom random; + CDnn dnn( random, MathEngine() ); + + CPtr expected = runOnce( dnn, *createDnnHeadAdapterNet( dnn ) ); + CPtr output = runOnce( *layer.GetDnn(), layer ); + EXPECT_TRUE( CompareBlobs( *expected, *output ) ); + EXPECT_TRUE( layer.GetDnnHead() ); // found and inited +} + +GTEST_TEST( SerializeFromFile, DnnHeadAdapterLayerSerialization ) +{ + checkSerializeLayer( "NeoMLDnnHeadAdapterLayer" ); +} + +// ==================================================================================================================== +