favorart
diff --git a/‎NeoML/include/NeoML/Dnn/DnnSolver.h‎
Lines changed: 31 additions & 90 deletions b/‎NeoML/include/NeoML/Dnn/DnnSolver.h‎
Lines changed: 31 additions & 90 deletions
diff --git a/‎NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h‎
Lines changed: 20 additions & 29 deletions b/‎NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h‎
Lines changed: 20 additions & 29 deletions
@@ -28,14 +28,13 @@ class CDnn;
 class NEOML_API CDnnSolver : virtual public IObject {
 public:
 	// Stores the calculated values of layer parameters gradients for further use in Train method
-	// forSharedWeightsLayer=true should only be used within layers that share weights with other layers.
+	// sharedWeights=true should only be used within layers that share weights with other layers
 	void AddDiff( CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramDiffBlobs, 
 		bool sharedWeights = false );
 
 	// Modifies the trainable parameters of the network layers, 
 	// using the accumulated gradients and previous steps' history (moment, etc.) 
 	void Train( float distributedCoeff = 1.f );
-
 	// Resets to the initial state
 	void Reset();
 
@@ -62,11 +61,17 @@ class NEOML_API CDnnSolver : virtual public IObject {
 
 	// Gets the reference to the math engine
 	IMathEngine& MathEngine() const { return mathEngine; }
+	// Get the intermediate result storing blob
+	const CDnnBlob& TempBlob() const { return *temporaryBlob; }
+	// Intermediate result storing blob
+	// hide it to private, its allocated size may > actual
+	CFloatHandle TempData();
+	// Reinitialize the intermediate result storing blob
+	bool ReInitTempBlob( int dataSize );
 
 	// Called once on Reset method call
 	// Resets the stats in the inheriting instances to the initial state
 	virtual void OnReset() {}
-
 	// On each training step the method is called once, before the call to TrainLayer for all layers
 	virtual void OnTrain() {}
 
@@ -78,13 +83,20 @@ class NEOML_API CDnnSolver : virtual public IObject {
 
 private:
 	IMathEngine& mathEngine;
+	CPtr<CDnnBlob> gradParams;
+
+	// MathEngine memory stored variables for calculations
 	float learningRate;
 	float regularizationL2;
 	float regularizationL1;
 	float maxGradientNorm;
 	float clipGradientMin;
 	float clipGradientMax;
 
+	// Intermediate result storing
+	// hide it to private, its allocated size may > actual
+	CPtr<CDnnBlob> temporaryBlob;
+
 	// The blobs sum
 	struct CDiffBlobSum final {
 		const CBaseLayer* LayerOwner{}; // for the given layer
@@ -141,7 +153,7 @@ void NEOML_API SerializeSolver( CArchive& archive, CDnn& dnn, CPtr<CDnnSolver>&
 //---------------------------------------------------------------------------------------------------------------------
 
 template<class T>
-class CSolverClassRegistrar {
+class CSolverClassRegistrar final {
 public:
 	explicit CSolverClassRegistrar( const char* solverName );
 	~CSolverClassRegistrar();
@@ -168,40 +180,27 @@ inline CSolverClassRegistrar<T>::~CSolverClassRegistrar()
 class NEOML_API CDnnSimpleGradientSolver : public CDnnSolver {
 	NEOML_DNN_SOLVER( CDnnSimpleGradientSolver )
 public:
-	CDnnSimpleGradientSolver( IMathEngine& mathEngine );
+	explicit CDnnSimpleGradientSolver( IMathEngine& mathEngine );
 
 	// Moment decay rate (moment is a weighted sum of previous gradients)
 	float GetMomentDecayRate() const { return momentDecayRate; }
 	void SetMomentDecayRate(float decayRate) { momentDecayRate = decayRate; }
-
+	// Backward compatibility mode
 	bool IsInCompatibilityMode() const { return isInCompatibilityMode; }
 	void SetCompatibilityMode( bool compatibilityMode ) { isInCompatibilityMode = compatibilityMode; }
 
 	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
+	// Updates the trainable weights of the layer
 	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs, 
 		const CObjectArray<CDnnBlob>& paramDiffBlobs, CObjectArray<CDnnBlob>& gradientHistory ) override;
 
 private:
 	// Moment decay rate (moment is a weighted sum of previous gradients)
 	float momentDecayRate;
-
 	// Backward compatibility mode
 	bool isInCompatibilityMode;
-
-	// Temporary variables of Handle type, used for calculations
-	enum TTempVariable {
-		TV_MomentDecayRateVar = 0,
-		TV_OpMomentDecayRateVar,
-		TV_OpRegL2MomentDecayRateVar,
-		TV_RateVar,
-		TV_L1Threshold,
-		TV_L1Mult,
-		TV_Count
-	};
-
-	CPtr<CDnnBlob> tempVariables;
 };
 
 //---------------------------------------------------------------------------------------------------------------------
@@ -210,7 +209,7 @@ class NEOML_API CDnnSimpleGradientSolver : public CDnnSolver {
 class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver {
 	NEOML_DNN_SOLVER( CDnnAdaptiveGradientSolver )
 public:
-	CDnnAdaptiveGradientSolver( IMathEngine& mathEngine );
+	explicit CDnnAdaptiveGradientSolver( IMathEngine& mathEngine );
 
 	// Retrieves and sets the moment decay rate (moment is a weighted sum of previous gradients)
 	float GetMomentDecayRate() const { return momentDecayRate; }
@@ -222,7 +221,7 @@ class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver {
 	// Retrieves and sets the espilon used to avoid division by zero when calculating second moment
 	float GetEpsilon() const { return epsilon; }
 	void SetEpsilon( float newEpsilon ) { epsilon = newEpsilon; }
-
+	// Backward compatibility mode
 	bool IsInCompatibilityMode() const { return isInCompatibilityMode; }
 	void SetCompatibilityMode( bool compatibilityMode ) { isInCompatibilityMode = compatibilityMode; }
 
@@ -249,7 +248,7 @@ class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver {
 	// Prepares for the next training step
 	void OnTrain() override;
 	// Updates the trainable weights of the layer
-	virtual void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs,
+	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs,
 		const CObjectArray<CDnnBlob>& paramDiffBlobs, CObjectArray<CDnnBlob>& gradientHistory ) override;
 
 private:
@@ -284,27 +283,8 @@ class NEOML_API CDnnAdaptiveGradientSolver : public CDnnSolver {
 	bool isAmsGradEnabled;
 	// Perform weight decay after calculating the moving averages
 	bool isDecoupledWeightDecay;
-
 	// Backward compatibility mode
 	bool isInCompatibilityMode;
-
-	enum TTempVariable {
-		TV_MomentDecayRateVar = 0,
-		TV_SecondMomentDecayRateVar,
-		TV_RegL2Var,
-		TV_OpMomentDecayRateVar,
-		TV_OpSecondMomentDecayRateVar,
-		TV_RateVar,
-		TV_L1Threshold,
-		TV_L1Mult,
-		TV_EpsilonVar,
-		TV_Count
-	};
-
-	// Temporary Handle variables for calculations
-	CPtr<CDnnBlob> tempVariables;
-
-	CPtr<CDnnBlob> temporaryBlob;
 };
 
 //---------------------------------------------------------------------------------------------------------------------
@@ -389,26 +369,6 @@ class NEOML_API CDnnNesterovGradientSolver : public CDnnSolver {
 	float muTPlusOne; // the mu coefficient for the next step
 	float productMuT; // the product of mu coefficient over all steps including the current one
 
-	enum TTempVariable {
-		TV_MomentDecayRateVar = 0,
-		TV_SecondMomentDecayRateVar,
-		TV_RegL2Var,
-		TV_OpMomentDecayRateVar,
-		TV_OpSecondMomentDecayRateVar,
-		TV_RateVar,
-		TV_L1Threshold,
-		TV_L1Mult,
-		TV_EpsilonVar,
-		TV_InvOpSecondMomentDecayRateNVar, // 1 / (1 - secondMomentDecay ^ N)
-		TV_MBarGradMultVar, // the gradient coefficient in the total sum
-		TV_MBarMomentMultVar, // the moment coefficient in the total sum
-		TV_Count
-	};
-
-	// Temporary blobs for calculations
-	CPtr<CDnnBlob> tempVariables;
-
-	CPtr<CDnnBlob> temporaryBlob;
 	// m with a stroke (from the paper referred to)
 	// It is a weighted sum of the gradient and the first moment
 	CPtr<CDnnBlob> mBarBlob;
@@ -492,11 +452,12 @@ class NEOML_API CDnnLambGradientSolver : public CDnnSolver {
 	void Serialize( CArchive& archive, const CDnn& dnn ) override;
 
 protected:
+	// Prepares for the next training step
+	void OnTrain() override;
+	// Updates the trainable weights of the layer
 	void TrainLayer( const CBaseLayer* layer, const CObjectArray<CDnnBlob>& paramBlobs,
 		const CObjectArray<CDnnBlob>& paramDiffBlobs, CObjectArray<CDnnBlob>& gradientHistory ) override;
 
-	void OnTrain() override;
-
 private:
 	// The gradientHistory array stores the previous values of gradients of different types
 	enum TGradientHistoryType {
@@ -519,48 +480,28 @@ class NEOML_API CDnnLambGradientSolver : public CDnnSolver {
 	// Is NVLamb modification used
 	bool useNvLamb;
 
-	enum TTempVariable {
-		TV_MomentDecayRateVar,
-		TV_SecondMomentDecayRateVar,
-		TV_OpMomentDecayRateVar,
-		TV_OpSecondMomentDecayRateVar,
-		TV_RateVar,
-		TV_EpsilonVar,
-		TV_WeightDecayVar,
-		TV_ClipMultiplierVar,
-		TV_LayerNormVar,
-		TV_TrustRatioVar,
-		TV_L2NormVar,
-
-		TV_Count
-	};
-
-	CPtr<CDnnBlob> tempVariables;
-
-	CPtr<CDnnBlob> tempBlob;
-
+	CPtr<CDnnBlob> normL2Var;
 	CArray<float> layersGradientNormSquare;
 	float totalGradientNorm;
 
 	// Layer excluded from optimization
-	struct CExcludedLayer {
+	struct CExcludedLayer final {
 		// Layer name (or substring)
 		CString LayerName;
 		// Match type (exact or substring)
-		TExcludeLayerNameMatchType MatchType;
+		TExcludeLayerNameMatchType MatchType{ ELNMT_Exact };
 		// Parameter number
 		// -1 if all parameters
-		int ParamIndex;
-
-		CExcludedLayer() : MatchType( ELNMT_Exact ), ParamIndex( NotFound ) {}
+		int ParamIndex{ NotFound };
 	};
 	// Layers excluded from weight decay
 	CArray<CExcludedLayer> excludedLayers;
+	mutable CPtr<CDnnBlob> tempNormBlob;
 
 	float calcL2NormAverage( const CConstFloatHandle& data, int dataSize ) const;
 	void getWeightDecayIndices( const CBaseLayer& layer, int paramsCount, CHashTable<int>& indexes ) const;
 
-	void calcNormalizeMultiplier( const CDnnBlob& weights, const CDnnBlob& update, const CFloatHandle& multiplier ) const;
+	float calcNormalizeMultiplier( const CDnnBlob& weights, const CDnnBlob& update ) const;
 };
 
 template<typename TLayer>
 
@@ -28,7 +28,6 @@ class CActivationDesc;
 class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer {
 	NEOML_DNN_LAYER( CLinearLayer )
 public:
-	enum TParam { TP_Multiplier, TP_FreeTerm, /*...*/ TP_Count };
 	using CParam = CLinearActivationParam;
 	static constexpr float DefaultMultiplier = CParam::DefaultMultiplier;
 	static constexpr float DefaultFreeTerm = CParam::DefaultFreeTerm;
@@ -38,23 +37,21 @@ class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer
 	void Serialize( CArchive& archive ) override;
 
 	float GetMultiplier() const { return multiplier; }
-	void SetMultiplier( float _multiplier ) { multiplier = _multiplier; ForceReshape(); }
+	void SetMultiplier( float _multiplier ) { multiplier = _multiplier; }
 	float GetFreeTerm() const { return freeTerm; }
-	void SetFreeTerm( float _freeTerm ) { freeTerm = _freeTerm; ForceReshape(); }
+	void SetFreeTerm( float _freeTerm ) { freeTerm = _freeTerm; }
 
 	void ApplyParam( CParam param ) { SetMultiplier( param.Multiplier ); SetFreeTerm( param.FreeTerm ); }
 	CActivationDesc GetDesc() const override;
 
 protected:
-	void OnReshaped() override;
 	void RunOnce() override;
 	void BackwardOnce() override;
 	int BlobsForBackward() const override { return 0; }
 
 private:
 	float multiplier = DefaultMultiplier;
 	float freeTerm = DefaultFreeTerm;
-	CPtr<CDnnBlob> vars;
 };
 
 NEOML_API CLayerWrapper<CLinearLayer> Linear( float multiplier, float freeTerm );
@@ -84,6 +81,9 @@ class NEOML_API CELULayer : public CBaseInPlaceLayer, public IActivationLayer {
 	void RunOnce() override;
 	void BackwardOnce() override;
 	int BlobsForBackward() const override { return TOutputBlobs; }
+
+private:
+	float alpha = DefaultAlpha;
 };
 
 NEOML_API CLayerWrapper<CELULayer> Elu( float alpha = CELULayer::DefaultAlpha );
@@ -104,8 +104,8 @@ class NEOML_API CReLULayer : public CBaseInPlaceLayer, public IActivationLayer {
 	// The upper cutoff for the function value. If you set it to a value > 0, 
 	// the function will be ReLU(x) = Upper_Threshold for x > Upper_Threshold
 	// The default value is 0: no cutoff
-	float GetUpperThreshold() const { return upperThreshold->GetData().GetValue(); }
-	void SetUpperThreshold( float threshold ) { upperThreshold->GetData().SetValue( threshold ); }
+	float GetUpperThreshold() const { return upperThreshold; }
+	void SetUpperThreshold( float threshold ) { upperThreshold = threshold; }
 
 	void ApplyParam( CParam param ) { SetUpperThreshold( param.UpperThreshold ); }
 	CActivationDesc GetDesc() const override;
@@ -116,7 +116,7 @@ class NEOML_API CReLULayer : public CBaseInPlaceLayer, public IActivationLayer {
 	int BlobsForBackward() const override { return TOutputBlobs; }
 
 private:
-	CPtr<CDnnBlob> upperThreshold;
+	float upperThreshold = DefaultUpperThreshold;
 };
 
 NEOML_API CLayerWrapper<CReLULayer> Relu( float threshold = CReLULayer::DefaultUpperThreshold );
@@ -146,6 +146,9 @@ class NEOML_API CLeakyReLULayer : public CBaseInPlaceLayer, public IActivationLa
 	void RunOnce() override;
 	void BackwardOnce() override;
 	int BlobsForBackward() const override { return TOutputBlobs; }
+
+private:
+	float alpha = DefaultAlpha;
 };
 
 NEOML_API CLayerWrapper<CLeakyReLULayer> LeakyRelu( float alpha = CLeakyReLULayer::DefaultAlpha );
@@ -268,10 +271,10 @@ class NEOML_API CHardSigmoidLayer : public CBaseInPlaceLayer, public IActivation
 
 	void Serialize( CArchive& archive ) override;
 
-	float GetSlope() const { return paramBlobs[0]->GetData().GetValue(); }
-	void SetSlope( float slope ) { paramBlobs[0]->GetData().SetValue( slope ); }
-	float GetBias() const { return paramBlobs[1]->GetData().GetValue(); }
-	void SetBias( float bias ) { paramBlobs[1]->GetData().SetValue( bias ); }
+	float GetSlope() const { return slope; }
+	void SetSlope( float _slope ) { slope = _slope; }
+	float GetBias() const { return bias; }
+	void SetBias( float _bias ) { bias = _bias; }
 
 	void ApplyParam( CParam param ) { SetSlope( param.Slope ); SetBias( param.Bias ); }
 	CActivationDesc GetDesc() const override;
@@ -282,7 +285,8 @@ class NEOML_API CHardSigmoidLayer : public CBaseInPlaceLayer, public IActivation
 	int BlobsForBackward() const override { return TOutputBlobs; }
 
 private:
-	void setDefaultParamBlobs( IMathEngine& mathEngine );
+	float slope = DefaultSlope;
+	float bias = DefaultBias;
 };
 
 NEOML_API CLayerWrapper<CHardSigmoidLayer> HardSigmoid( float slope, float bias );
@@ -373,9 +377,6 @@ class NEOML_API CErfLayer : public CBaseLayer, public IActivationLayer {
 	void RunOnce() override;
 	void BackwardOnce() override;
 	int BlobsForBackward() const override { return TInputBlobs; }
-
-private:
-	CPtr<CDnnBlob> mult;
 };
 
 NEOML_API CLayerWrapper<CErfLayer> Erf();
@@ -395,7 +396,9 @@ class NEOML_API CGELULayer : public CBaseLayer, public IActivationLayer {
 	static const TCalculationMode CM_Precise = CParam::TCalculationMode::CM_Precise;
 	static const TCalculationMode CM_SigmoidApproximate = CParam::TCalculationMode::CM_SigmoidApproximate;
 
-	explicit CGELULayer( IMathEngine& mathEngine );
+	explicit CGELULayer( IMathEngine& mathEngine ) :
+		CBaseLayer( mathEngine, "CGELULayer", false )
+	{}
 
 	void Serialize( CArchive& archive ) override;
 
@@ -415,18 +418,6 @@ class NEOML_API CGELULayer : public CBaseLayer, public IActivationLayer {
 
 private:
 	TCalculationMode mode = DefaultCalculationMode;
-
-	// 1
-	CFloatHandleVar oneVar;
-	// 0.5
-	CFloatHandleVar halfVar;
-	// 1/sqrt(2)
-	CFloatHandleVar sqrt2InvVar;
-	// 1/sqrt(2pi)
-	CFloatHandleVar sqrt2PiInvVar;
-	// 1.702f
-	CFloatHandleVar approxScaleVar;
-
 	CPtr<CDnnBlob> erfMemoization;
 
 	void runPrecise();