diff --git a/api/handlers/exporter/decided.go b/api/handlers/exporter/decided.go index 840c5db3cd..cec19c9d5a 100644 --- a/api/handlers/exporter/decided.go +++ b/api/handlers/exporter/decided.go @@ -12,8 +12,8 @@ import ( spectypes "github.com/ssvlabs/ssv-spec/types" "github.com/ssvlabs/ssv/api" + "github.com/ssvlabs/ssv/ibft/storage" dutytracer "github.com/ssvlabs/ssv/operator/dutytracer" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" ) // TraceDecideds godoc @@ -132,7 +132,7 @@ func (e *Exporter) Decideds(w http.ResponseWriter, r *http.Request) error { role := spectypes.BeaconRole(r) store := e.participantStores.Get(role) - var participantsRange []qbftstorage.ParticipantsRangeEntry + var participantsRange []storage.ParticipantsRangeEntry if len(pubkeys) == 0 { var err error @@ -216,12 +216,12 @@ func (e *Exporter) getValidatorDecidedsForRole(slot phase0.Slot, indices []phase // toParticipantsRangeEntry converts an index-based entry into a ParticipantsRangeEntry // by resolving the validator's pubkey from the registry store. -func (e *Exporter) toParticipantsRangeEntry(ent dutytracer.ParticipantsRangeIndexEntry) (qbftstorage.ParticipantsRangeEntry, error) { +func (e *Exporter) toParticipantsRangeEntry(ent dutytracer.ParticipantsRangeIndexEntry) (storage.ParticipantsRangeEntry, error) { pk, found := e.validators.ValidatorPubkey(ent.Index) if !found { - return qbftstorage.ParticipantsRangeEntry{}, fmt.Errorf("validator not found by index: %d", ent.Index) + return storage.ParticipantsRangeEntry{}, fmt.Errorf("validator not found by index: %d", ent.Index) } - return qbftstorage.ParticipantsRangeEntry{ + return storage.ParticipantsRangeEntry{ Slot: ent.Slot, PubKey: pk, Signers: ent.Signers, diff --git a/api/handlers/exporter/decided_model.go b/api/handlers/exporter/decided_model.go index da9dd9b592..7889326c9d 100644 --- a/api/handlers/exporter/decided_model.go +++ b/api/handlers/exporter/decided_model.go @@ -6,7 +6,7 @@ import ( spectypes "github.com/ssvlabs/ssv-spec/types" "github.com/ssvlabs/ssv/api" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" + "github.com/ssvlabs/ssv/ibft/storage" ) // DecidedParticipant describes a decided duty participant entry. @@ -72,7 +72,7 @@ func (r *DecidedsRequest) hasFilters() bool { return len(r.PubKeys) > 0 || len(r.Indices) > 0 } -func toParticipantResponse(role spectypes.BeaconRole, entry qbftstorage.ParticipantsRangeEntry) *DecidedParticipant { +func toParticipantResponse(role spectypes.BeaconRole, entry storage.ParticipantsRangeEntry) *DecidedParticipant { response := &DecidedParticipant{ Role: role.String(), Slot: uint64(entry.Slot), diff --git a/api/handlers/exporter/exporter_test.go b/api/handlers/exporter/exporter_test.go index 4d0d3ac5eb..38473411c9 100644 --- a/api/handlers/exporter/exporter_test.go +++ b/api/handlers/exporter/exporter_test.go @@ -15,12 +15,11 @@ import ( "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/ethereum/go-ethereum/common" "github.com/hashicorp/go-multierror" + spectypes "github.com/ssvlabs/ssv-spec/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/zap" - spectypes "github.com/ssvlabs/ssv-spec/types" - "github.com/ssvlabs/ssv/api" "github.com/ssvlabs/ssv/exporter" "github.com/ssvlabs/ssv/exporter/rolemask" @@ -28,20 +27,19 @@ import ( ibftstorage "github.com/ssvlabs/ssv/ibft/storage" dutytracer "github.com/ssvlabs/ssv/operator/dutytracer" "github.com/ssvlabs/ssv/operator/slotticker" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" "github.com/ssvlabs/ssv/registry/storage" ) -// mockParticipantStore is a basic mock for qbftstorage.ParticipantStore. +// mockParticipantStore is a basic mock for ibftstorage.ParticipantStore. type mockParticipantStore struct { - participantsRangeEntries map[string][]qbftstorage.ParticipantsRangeEntry + participantsRangeEntries map[string][]ibftstorage.ParticipantsRangeEntry } // newMockParticipantStore creates a new instance of mockParticipantStore. func newMockParticipantStore() *mockParticipantStore { return &mockParticipantStore{ - participantsRangeEntries: make(map[string][]qbftstorage.ParticipantsRangeEntry), + participantsRangeEntries: make(map[string][]ibftstorage.ParticipantsRangeEntry), } } @@ -54,8 +52,8 @@ func (m *mockParticipantStore) SaveParticipants(spectypes.ValidatorPK, phase0.Sl } // GetAllParticipantsInRange returns all participant entries within the given slot range. -func (m *mockParticipantStore) GetAllParticipantsInRange(from, to phase0.Slot) ([]qbftstorage.ParticipantsRangeEntry, error) { - var result []qbftstorage.ParticipantsRangeEntry +func (m *mockParticipantStore) GetAllParticipantsInRange(from, to phase0.Slot) ([]ibftstorage.ParticipantsRangeEntry, error) { + var result []ibftstorage.ParticipantsRangeEntry for _, entries := range m.participantsRangeEntries { for _, entry := range entries { if entry.Slot >= from && entry.Slot <= to { @@ -67,9 +65,9 @@ func (m *mockParticipantStore) GetAllParticipantsInRange(from, to phase0.Slot) ( } // GetParticipantsInRange returns participant entries for a given public key and slot range. -func (m *mockParticipantStore) GetParticipantsInRange(pk spectypes.ValidatorPK, from, to phase0.Slot) ([]qbftstorage.ParticipantsRangeEntry, error) { +func (m *mockParticipantStore) GetParticipantsInRange(pk spectypes.ValidatorPK, from, to phase0.Slot) ([]ibftstorage.ParticipantsRangeEntry, error) { key := hex.EncodeToString(pk[:]) - var result []qbftstorage.ParticipantsRangeEntry + var result []ibftstorage.ParticipantsRangeEntry entries, ok := m.participantsRangeEntries[key] if !ok { return result, nil @@ -90,14 +88,14 @@ func (m *mockParticipantStore) Prune(context.Context, phase0.Slot) { // no-op. } -func (m *mockParticipantStore) PruneContinously(context.Context, slotticker.Provider, phase0.Slot) { +func (m *mockParticipantStore) PruneContinuously(context.Context, slotticker.Provider, phase0.Slot) { // no-op. } // AddEntry adds an entry to the mock store. func (m *mockParticipantStore) AddEntry(pk spectypes.ValidatorPK, slot phase0.Slot, signers []uint64) { key := hex.EncodeToString(pk[:]) - entry := qbftstorage.ParticipantsRangeEntry{ + entry := ibftstorage.ParticipantsRangeEntry{ Slot: slot, PubKey: pk, Signers: signers, @@ -110,7 +108,7 @@ type errorAllRangeMockStore struct { *mockParticipantStore } -func (m *errorAllRangeMockStore) GetAllParticipantsInRange(phase0.Slot, phase0.Slot) ([]qbftstorage.ParticipantsRangeEntry, error) { +func (m *errorAllRangeMockStore) GetAllParticipantsInRange(phase0.Slot, phase0.Slot) ([]ibftstorage.ParticipantsRangeEntry, error) { return nil, fmt.Errorf("forced error on GetAllParticipantsInRange") } @@ -119,7 +117,7 @@ type errorByPKMockStore struct { *mockParticipantStore } -func (m *errorByPKMockStore) GetParticipantsInRange(spectypes.ValidatorPK, phase0.Slot, phase0.Slot) ([]qbftstorage.ParticipantsRangeEntry, error) { +func (m *errorByPKMockStore) GetParticipantsInRange(spectypes.ValidatorPK, phase0.Slot, phase0.Slot) ([]ibftstorage.ParticipantsRangeEntry, error) { return nil, fmt.Errorf("forced error on GetParticipantsInRange") } @@ -133,7 +131,7 @@ func TestTransformToParticipantResponse(t *testing.T) { var pk spectypes.ValidatorPK copy(pk[:], pkBytes) - entry := qbftstorage.ParticipantsRangeEntry{ + entry := ibftstorage.ParticipantsRangeEntry{ Slot: phase0.Slot(123), PubKey: pk, Signers: []uint64{1, 2, 3, 4}, @@ -752,7 +750,7 @@ func TestExporterTraceDecideds(t *testing.T) { "pubkeys": []string{"b24454393691331ee6eba4ffa2dbb2600b9859f908c3e648b6c6de9e1dea3e9329866015d08355c8d451427762b913d1"}, }, setupMock: func(store *mockTraceStore) { - entry := qbftstorage.ParticipantsRangeEntry{Signers: []uint64{}} + entry := ibftstorage.ParticipantsRangeEntry{Signers: []uint64{}} store.AddValidatorDecided(spectypes.BNRoleProposer, phase0.Slot(150), entry.Signers) }, expectedStatus: http.StatusOK, @@ -772,7 +770,7 @@ func TestExporterTraceDecideds(t *testing.T) { "pubkeys": []string{"b24454393691331ee6eba4ffa2dbb2600b9859f908c3e648b6c6de9e1dea3e9329866015d08355c8d451427762b913d1"}, }, setupMock: func(store *mockTraceStore) { - entry := qbftstorage.ParticipantsRangeEntry{Signers: []uint64{}} + entry := ibftstorage.ParticipantsRangeEntry{Signers: []uint64{}} store.AddCommitteeDecided(phase0.Slot(150), entry.Signers) }, expectedStatus: http.StatusOK, diff --git a/beacon/goclient/aggregator.go b/beacon/goclient/aggregator.go index e2b0bf7cd3..2e4248dce9 100644 --- a/beacon/goclient/aggregator.go +++ b/beacon/goclient/aggregator.go @@ -25,7 +25,7 @@ func (gc *GoClient) SubmitAggregateSelectionProof( // As specified in spec, an aggregator should wait until two thirds of the way through slot // to broadcast the best aggregate to the global aggregate channel. // https://github.com/ethereum/consensus-specs/blob/v0.9.3/specs/validator/0_beacon-chain-validator.md#broadcast-aggregate - if err := gc.waitToSlotTwoThirds(ctx, slot); err != nil { + if err := gc.waitTwoThirdsIntoSlot(ctx, slot); err != nil { return nil, 0, fmt.Errorf("wait for 2/3 of slot: %w", err) } @@ -153,8 +153,8 @@ func (gc *GoClient) SubmitSignedAggregateSelectionProof( return nil } -// waitToSlotTwoThirds waits until two-third of the slot has transpired (SECONDS_PER_SLOT * 2 / 3 seconds after the start of slot) -func (gc *GoClient) waitToSlotTwoThirds(ctx context.Context, slot phase0.Slot) error { +// waitTwoThirdsIntoSlot waits until two-third of the slot has transpired (SECONDS_PER_SLOT * 2 / 3 seconds after the start of slot) +func (gc *GoClient) waitTwoThirdsIntoSlot(ctx context.Context, slot phase0.Slot) error { config := gc.getBeaconConfig() oneInterval := config.IntervalDuration() finalTime := config.SlotStartTime(slot).Add(2 * oneInterval) diff --git a/beacon/goclient/sync_committee_contribution.go b/beacon/goclient/sync_committee_contribution.go index fe965d514a..b2595389de 100644 --- a/beacon/goclient/sync_committee_contribution.go +++ b/beacon/goclient/sync_committee_contribution.go @@ -51,7 +51,7 @@ func (gc *GoClient) GetSyncCommitteeContribution( return nil, DataVersionNil, fmt.Errorf("mismatching number of selection proofs and subnet IDs") } - gc.waitForOneThirdSlotDuration(ctx, slot) + gc.waitOneThirdIntoSlot(ctx, slot) scDataReqStart := time.Now() beaconBlockRootResp, err := gc.multiClient.BeaconBlockRoot(ctx, &api.BeaconBlockRootOpts{ @@ -70,7 +70,7 @@ func (gc *GoClient) GetSyncCommitteeContribution( blockRoot := beaconBlockRootResp.Data - if err := gc.waitToSlotTwoThirds(ctx, slot); err != nil { + if err := gc.waitTwoThirdsIntoSlot(ctx, slot); err != nil { return nil, 0, fmt.Errorf("wait for 2/3 of slot: %w", err) } @@ -129,8 +129,8 @@ func (gc *GoClient) SubmitSignedContributionAndProof( return nil } -// waitForOneThirdSlotDuration waits until one-third of the slot has transpired (SECONDS_PER_SLOT / 3 seconds after slot start time) -func (gc *GoClient) waitForOneThirdSlotDuration(ctx context.Context, slot phase0.Slot) { +// waitOneThirdIntoSlot waits until one-third of the slot has transpired (SECONDS_PER_SLOT / 3 seconds after slot start time) +func (gc *GoClient) waitOneThirdIntoSlot(ctx context.Context, slot phase0.Slot) { config := gc.getBeaconConfig() delay := config.IntervalDuration() finalTime := config.SlotStartTime(slot).Add(delay) diff --git a/beacon/goclient/validator.go b/beacon/goclient/validator.go index 888a8e835a..bae5221f35 100644 --- a/beacon/goclient/validator.go +++ b/beacon/goclient/validator.go @@ -48,7 +48,7 @@ func (gc *GoClient) SubmitValidatorRegistrations(ctx context.Context, registrati if err != nil { return errMultiClient(fmt.Errorf("submit validator registrations (chunk size = %d): %w", len(chunk), err), "SubmitValidatorRegistrations") } - gc.log.Info("submitted validator registrations", fields.Count(len(chunk)), fields.Duration(reqStart)) + gc.log.Info("submitted validator registrations", fields.Count(len(chunk)), fields.Took(time.Since(reqStart))) } return nil diff --git a/cli/operator/node.go b/cli/operator/node.go index fa09c07ae4..476015f70c 100644 --- a/cli/operator/node.go +++ b/cli/operator/node.go @@ -71,7 +71,6 @@ import ( "github.com/ssvlabs/ssv/operator/validator" "github.com/ssvlabs/ssv/operator/validator/metadata" "github.com/ssvlabs/ssv/operator/validators" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" "github.com/ssvlabs/ssv/protocol/v2/types" registrystorage "github.com/ssvlabs/ssv/registry/storage" @@ -889,18 +888,19 @@ func applyMigrations( // If migrations were applied, we run a full garbage collection cycle // to reclaim any space that may have been freed up. - start := time.Now() + + logger.Debug("running full GC cycle...") ctx, cancel := context.WithTimeout(cfg.DBOptions.Ctx, 6*time.Minute) defer cancel() - logger.Debug("running full GC cycle...", fields.Duration(start)) + start := time.Now() if err := db.FullGC(ctx); err != nil { return fmt.Errorf("failed to collect garbage: %w", err) } - logger.Debug("post-migrations garbage collection completed", fields.Duration(start)) + logger.Debug("post-migrations garbage collection completed", fields.Took(time.Since(start))) return nil } @@ -1203,7 +1203,7 @@ func initSlotPruning(ctx context.Context, stores *ibftstorage.ParticipantStores, threshold := slot - phase0.Slot(retain) // async perform initial slot gc - _ = stores.Each(func(_ spectypes.BeaconRole, store qbftstorage.ParticipantStore) error { + _ = stores.Each(func(_ spectypes.BeaconRole, store ibftstorage.ParticipantStore) error { wg.Add(1) go func() { defer wg.Done() @@ -1215,8 +1215,8 @@ func initSlotPruning(ctx context.Context, stores *ibftstorage.ParticipantStores, wg.Wait() // start background job for removing old slots on every tick - _ = stores.Each(func(_ spectypes.BeaconRole, store qbftstorage.ParticipantStore) error { - go store.PruneContinously(ctx, slotTickerProvider, phase0.Slot(retain)) + _ = stores.Each(func(_ spectypes.BeaconRole, store ibftstorage.ParticipantStore) error { + go store.PruneContinuously(ctx, slotTickerProvider, phase0.Slot(retain)) return nil }) } diff --git a/exporter/api/decided/stream.go b/exporter/api/decided/stream.go index 6d9671a401..2bbd9a1b8c 100644 --- a/exporter/api/decided/stream.go +++ b/exporter/api/decided/stream.go @@ -5,15 +5,14 @@ import ( "time" "github.com/patrickmn/go-cache" - "go.uber.org/zap" - spectypes "github.com/ssvlabs/ssv-spec/types" + "go.uber.org/zap" "github.com/ssvlabs/ssv/exporter/api" + qbftstorage "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability/log/fields" dutytracer "github.com/ssvlabs/ssv/operator/dutytracer" "github.com/ssvlabs/ssv/protocol/v2/qbft/controller" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" registrystorage "github.com/ssvlabs/ssv/registry/storage" ) diff --git a/exporter/api/msg.go b/exporter/api/msg.go index 0ceb5d175d..482029ed3b 100644 --- a/exporter/api/msg.go +++ b/exporter/api/msg.go @@ -9,7 +9,7 @@ import ( specqbft "github.com/ssvlabs/ssv-spec/qbft" spectypes "github.com/ssvlabs/ssv-spec/types" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" + qbftstorage "github.com/ssvlabs/ssv/ibft/storage" ) // Message represents an exporter message diff --git a/exporter/api/query_handlers.go b/exporter/api/query_handlers.go index 02b4390551..0c46adb2c3 100644 --- a/exporter/api/query_handlers.go +++ b/exporter/api/query_handlers.go @@ -11,7 +11,6 @@ import ( "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability/log/fields" "github.com/ssvlabs/ssv/protocol/v2/message" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" ) const ( @@ -114,10 +113,10 @@ func (h *Handler) HandleParticipantsQuery(store *storage.ParticipantStores, nm * nm.Msg = res } -func toParticipations(role spectypes.BeaconRole, pk spectypes.ValidatorPK, ee []qbftstorage.ParticipantsRangeEntry) []qbftstorage.Participation { - out := make([]qbftstorage.Participation, 0, len(ee)) +func toParticipations(role spectypes.BeaconRole, pk spectypes.ValidatorPK, ee []storage.ParticipantsRangeEntry) []storage.Participation { + out := make([]storage.Participation, 0, len(ee)) for _, e := range ee { - p := qbftstorage.Participation{ + p := storage.Participation{ ParticipantsRangeEntry: e, Role: role, PubKey: pk, diff --git a/exporter/api/query_handlers_test.go b/exporter/api/query_handlers_test.go index b4c00d65d2..8eae922d72 100644 --- a/exporter/api/query_handlers_test.go +++ b/exporter/api/query_handlers_test.go @@ -20,7 +20,6 @@ import ( "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/operator/storage" - protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" kv "github.com/ssvlabs/ssv/storage/badger" "github.com/ssvlabs/ssv/storage/basedb" ) @@ -108,7 +107,7 @@ func TestHandleDecidedQuery(t *testing.T) { for _, role := range roles { pk := sks[1].GetPublicKey() ssvConfig := networkconfig.TestNetwork.SSV - decided250Seq, err := protocoltesting.CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(250), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { + decided250Seq, err := qbftstorage.CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(250), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { return oids, &specqbft.Message{ MsgType: specqbft.CommitMsgType, Height: height, diff --git a/protocol/v2/qbft/storage/participant_store.go b/ibft/storage/participant_store.go similarity index 93% rename from protocol/v2/qbft/storage/participant_store.go rename to ibft/storage/participant_store.go index 0ecacc3dfb..27c1ed2cb8 100644 --- a/protocol/v2/qbft/storage/participant_store.go +++ b/ibft/storage/participant_store.go @@ -1,4 +1,4 @@ -package qbftstorage +package storage import ( "context" @@ -50,5 +50,5 @@ type ParticipantStore interface { Prune(ctx context.Context, below phase0.Slot) // SlotGC continuously removes old slots - PruneContinously(ctx context.Context, slotTickerProvider slotticker.Provider, retain phase0.Slot) + PruneContinuously(ctx context.Context, slotTickerProvider slotticker.Provider, retain phase0.Slot) } diff --git a/ibft/storage/store.go b/ibft/storage/store.go index c9f45b8f1a..eadb86eadf 100644 --- a/ibft/storage/store.go +++ b/ibft/storage/store.go @@ -10,13 +10,11 @@ import ( "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/pkg/errors" - "go.uber.org/zap" - spectypes "github.com/ssvlabs/ssv-spec/types" + "go.uber.org/zap" "github.com/ssvlabs/ssv/observability/log/fields" "github.com/ssvlabs/ssv/operator/slotticker" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" "github.com/ssvlabs/ssv/storage/basedb" ) @@ -26,9 +24,9 @@ const ( participantsKey = "pt" ) -// participantStorage struct +// ParticipantStorage struct // instanceType is what separates different iBFT eth2 duty types (attestation, proposal and aggregation) -type participantStorage struct { +type ParticipantStorage struct { logger *zap.Logger prefix []byte oldPrefix string // kept back for cleanup @@ -37,9 +35,9 @@ type participantStorage struct { } // New create new participant store -func New(logger *zap.Logger, db basedb.Database, prefix spectypes.BeaconRole) qbftstorage.ParticipantStore { +func New(logger *zap.Logger, db basedb.Database, prefix spectypes.BeaconRole) *ParticipantStorage { role := byte(prefix & 0xff) - return &participantStorage{ + return &ParticipantStorage{ logger: logger, prefix: []byte{role}, oldPrefix: prefix.String(), @@ -48,7 +46,7 @@ func New(logger *zap.Logger, db basedb.Database, prefix spectypes.BeaconRole) qb } // Prune waits for the initial tick and then removes all slots below the tickSlot - retain -func (i *participantStorage) Prune(ctx context.Context, threshold phase0.Slot) { +func (i *ParticipantStorage) Prune(ctx context.Context, threshold phase0.Slot) { i.logger.Info("start initial stale slot cleanup", zap.String("store", i.ID()), fields.Slot(threshold)) // remove ALL slots below the threshold @@ -58,8 +56,8 @@ func (i *participantStorage) Prune(ctx context.Context, threshold phase0.Slot) { i.logger.Info("removed stale slot entries", zap.String("store", i.ID()), fields.Slot(threshold), zap.Int("count", count), zap.Duration("took", time.Since(start))) } -// PruneContinously on every tick looks up and removes the slots that fall below the retain threshold -func (i *participantStorage) PruneContinously(ctx context.Context, slotTickerProvider slotticker.Provider, retain phase0.Slot) { +// PruneContinuously on every tick looks up and removes the slots that fall below the retain-threshold +func (i *ParticipantStorage) PruneContinuously(ctx context.Context, slotTickerProvider slotticker.Provider, retain phase0.Slot) { ticker := slotTickerProvider() i.logger.Info("start stale slot cleanup loop", zap.String("store", i.ID())) for { @@ -79,7 +77,7 @@ func (i *participantStorage) PruneContinously(ctx context.Context, slotTickerPro } // removes ALL entries that have given slot in their prefix -func (i *participantStorage) removeSlotAt(slot phase0.Slot) (int, error) { +func (i *ParticipantStorage) removeSlotAt(slot phase0.Slot) (int, error) { var keySet [][]byte prefix := i.makePrefix(slotToByteSlice(slot)) @@ -117,7 +115,7 @@ func (i *participantStorage) removeSlotAt(slot phase0.Slot) (int, error) { var dropPrefixMu sync.Mutex // removes ALL entries for any slots older or equal to given slot -func (i *participantStorage) removeSlotsOlderThan(slot phase0.Slot) int { +func (i *ParticipantStorage) removeSlotsOlderThan(slot phase0.Slot) int { var total int for { slot-- // slots are incremental @@ -157,7 +155,7 @@ func (i *participantStorage) removeSlotsOlderThan(slot phase0.Slot) int { } // CleanAllInstances removes all records in old format. -func (i *participantStorage) CleanAllInstances() error { +func (i *ParticipantStorage) CleanAllInstances() error { if err := i.db.DropPrefix([]byte(i.oldPrefix)); err != nil { return errors.Wrap(err, "failed to drop all records") } @@ -165,7 +163,7 @@ func (i *participantStorage) CleanAllInstances() error { return nil } -func (i *participantStorage) SaveParticipants(pk spectypes.ValidatorPK, slot phase0.Slot, newParticipants []spectypes.OperatorID) (updated bool, err error) { +func (i *ParticipantStorage) SaveParticipants(pk spectypes.ValidatorPK, slot phase0.Slot, newParticipants []spectypes.OperatorID) (updated bool, err error) { start := time.Now() defer func() { dur := time.Since(start) @@ -199,13 +197,13 @@ func (i *participantStorage) SaveParticipants(pk spectypes.ValidatorPK, slot pha return true, nil } -func (i *participantStorage) GetAllParticipantsInRange(from, to phase0.Slot) ([]qbftstorage.ParticipantsRangeEntry, error) { - var ee []qbftstorage.ParticipantsRangeEntry +func (i *ParticipantStorage) GetAllParticipantsInRange(from, to phase0.Slot) ([]ParticipantsRangeEntry, error) { + var ee []ParticipantsRangeEntry for slot := from; slot <= to; slot++ { slotBytes := slotToByteSlice(slot) prefix := i.makePrefix(slotBytes) err := i.db.GetAll(prefix, func(_ int, o basedb.Obj) error { - re := qbftstorage.ParticipantsRangeEntry{ + re := ParticipantsRangeEntry{ Slot: slot, PubKey: spectypes.ValidatorPK(o.Key), Signers: decodeOperators(o.Value), @@ -222,8 +220,8 @@ func (i *participantStorage) GetAllParticipantsInRange(from, to phase0.Slot) ([] return ee, nil } -func (i *participantStorage) GetParticipantsInRange(pk spectypes.ValidatorPK, from, to phase0.Slot) ([]qbftstorage.ParticipantsRangeEntry, error) { - participantsRange := make([]qbftstorage.ParticipantsRangeEntry, 0) +func (i *ParticipantStorage) GetParticipantsInRange(pk spectypes.ValidatorPK, from, to phase0.Slot) ([]ParticipantsRangeEntry, error) { + participantsRange := make([]ParticipantsRangeEntry, 0) for slot := from; slot <= to; slot++ { participants, err := i.GetParticipants(pk, slot) @@ -235,7 +233,7 @@ func (i *participantStorage) GetParticipantsInRange(pk spectypes.ValidatorPK, fr continue } - participantsRange = append(participantsRange, qbftstorage.ParticipantsRangeEntry{ + participantsRange = append(participantsRange, ParticipantsRangeEntry{ Slot: slot, PubKey: pk, Signers: participants, @@ -245,11 +243,11 @@ func (i *participantStorage) GetParticipantsInRange(pk spectypes.ValidatorPK, fr return participantsRange, nil } -func (i *participantStorage) GetParticipants(pk spectypes.ValidatorPK, slot phase0.Slot) ([]spectypes.OperatorID, error) { +func (i *ParticipantStorage) GetParticipants(pk spectypes.ValidatorPK, slot phase0.Slot) ([]spectypes.OperatorID, error) { return i.getParticipants(nil, pk, slot) } -func (i *participantStorage) getParticipants(txn basedb.ReadWriter, pk spectypes.ValidatorPK, slot phase0.Slot) ([]spectypes.OperatorID, error) { +func (i *ParticipantStorage) getParticipants(txn basedb.ReadWriter, pk spectypes.ValidatorPK, slot phase0.Slot) ([]spectypes.OperatorID, error) { val, found, err := i.get(txn, pk[:], slotToByteSlice(slot)) if err != nil { return nil, err @@ -262,7 +260,7 @@ func (i *participantStorage) getParticipants(txn basedb.ReadWriter, pk spectypes return operators, nil } -func (i *participantStorage) saveParticipants(txn basedb.ReadWriter, pk spectypes.ValidatorPK, slot phase0.Slot, operators []spectypes.OperatorID) error { +func (i *ParticipantStorage) saveParticipants(txn basedb.ReadWriter, pk spectypes.ValidatorPK, slot phase0.Slot, operators []spectypes.OperatorID) error { bytes, err := encodeOperators(operators) if err != nil { return fmt.Errorf("encode operators: %w", err) @@ -280,12 +278,12 @@ func mergeParticipants(existingParticipants, newParticipants []spectypes.Operato return slices.Compact(allParticipants) } -func (i *participantStorage) save(txn basedb.ReadWriter, value []byte, pk, slot []byte) error { +func (i *ParticipantStorage) save(txn basedb.ReadWriter, value []byte, pk, slot []byte) error { prefix := i.makePrefix(slot) return i.db.Using(txn).Set(prefix, pk, value) } -func (i *participantStorage) get(txn basedb.ReadWriter, pk, slot []byte) ([]byte, bool, error) { +func (i *ParticipantStorage) get(txn basedb.ReadWriter, pk, slot []byte) ([]byte, bool, error) { prefix := i.makePrefix(slot) obj, found, err := i.db.Using(txn).Get(prefix, pk) if err != nil { @@ -297,12 +295,12 @@ func (i *participantStorage) get(txn basedb.ReadWriter, pk, slot []byte) ([]byte return obj.Value, found, nil } -func (i *participantStorage) ID() string { +func (i *ParticipantStorage) ID() string { bnr := spectypes.BeaconRole(uint64(i.prefix[0])) return bnr.String() } -func (i *participantStorage) makePrefix(slot []byte) []byte { +func (i *ParticipantStorage) makePrefix(slot []byte) []byte { prefix := make([]byte, 0, len(participantsKey)+1+len(slot)) prefix = append(prefix, participantsKey...) prefix = append(prefix, i.prefix...) diff --git a/ibft/storage/store_test.go b/ibft/storage/store_test.go index d79c85ab82..0abeda512a 100644 --- a/ibft/storage/store_test.go +++ b/ibft/storage/store_test.go @@ -19,11 +19,10 @@ import ( specqbft "github.com/ssvlabs/ssv-spec/qbft" spectypes "github.com/ssvlabs/ssv-spec/types" + "github.com/ssvlabs/ssv/ssvsigner/keys/rsaencryption" + "github.com/ssvlabs/ssv/operator/slotticker" mockslotticker "github.com/ssvlabs/ssv/operator/slotticker/mocks" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" - protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" - "github.com/ssvlabs/ssv/ssvsigner/keys/rsaencryption" kv "github.com/ssvlabs/ssv/storage/badger" "github.com/ssvlabs/ssv/storage/basedb" ) @@ -47,7 +46,7 @@ func TestRemoveSlot(t *testing.T) { } pk := sks[1].GetPublicKey() - decided250Seq, err := protocoltesting.CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(250), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { + decided250Seq, err := CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(250), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { return oids, &specqbft.Message{ MsgType: specqbft.CommitMsgType, Height: height, @@ -58,7 +57,7 @@ func TestRemoveSlot(t *testing.T) { }) require.NoError(t, err) - storage := ibftStorage.Get(role).(*participantStorage) + storage := ibftStorage.Get(role).(*ParticipantStorage) // save participants for _, d := range decided250Seq { @@ -86,7 +85,7 @@ func TestRemoveSlot(t *testing.T) { require.Nil(t, err) require.Equal(t, 151, len(pp)) // seq 0 - 150 - found := slices.ContainsFunc(pp, func(e qbftstorage.ParticipantsRangeEntry) bool { + found := slices.ContainsFunc(pp, func(e ParticipantsRangeEntry) bool { return e.Slot < threshold }) @@ -129,7 +128,7 @@ func TestSlotCleanupJob(t *testing.T) { // pk 1 pk := sks[1].GetPublicKey() - decided10Seq, err := protocoltesting.CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(9), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { + decided10Seq, err := CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(9), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { return oids, &specqbft.Message{ MsgType: specqbft.CommitMsgType, Height: height, @@ -140,7 +139,7 @@ func TestSlotCleanupJob(t *testing.T) { }) require.NoError(t, err) - storage := ibftStorage.Get(role).(*participantStorage) + storage := ibftStorage.Get(role).(*ParticipantStorage) // save participants for _, d := range decided10Seq { @@ -154,7 +153,7 @@ func TestSlotCleanupJob(t *testing.T) { // pk 2 pk = sks[2].GetPublicKey() - decided10Seq, err = protocoltesting.CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(9), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { + decided10Seq, err = CreateMultipleStoredInstances(rsaKeys, specqbft.Height(0), specqbft.Height(9), func(height specqbft.Height) ([]spectypes.OperatorID, *specqbft.Message) { return oids, &specqbft.Message{ MsgType: specqbft.CommitMsgType, Height: height, @@ -215,7 +214,7 @@ func TestSlotCleanupJob(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - storage.PruneContinously(ctx, tickerProv, 1) + storage.PruneContinuously(ctx, tickerProv, 1) }() mockTimeChan <- time.Now() diff --git a/ibft/storage/stores.go b/ibft/storage/stores.go index 4480940c59..dcc9adc5f7 100644 --- a/ibft/storage/stores.go +++ b/ibft/storage/stores.go @@ -3,23 +3,22 @@ package storage import ( spectypes "github.com/ssvlabs/ssv-spec/types" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" "github.com/ssvlabs/ssv/utils/hashmap" ) // ParticipantStores wraps sync map with cast functions to qbft store type ParticipantStores struct { - m *hashmap.Map[spectypes.BeaconRole, qbftstorage.ParticipantStore] + m *hashmap.Map[spectypes.BeaconRole, ParticipantStore] } func NewStores() *ParticipantStores { return &ParticipantStores{ - m: hashmap.New[spectypes.BeaconRole, qbftstorage.ParticipantStore](), + m: hashmap.New[spectypes.BeaconRole, ParticipantStore](), } } // Get store from sync map by role type -func (qs *ParticipantStores) Get(role spectypes.BeaconRole) qbftstorage.ParticipantStore { +func (qs *ParticipantStores) Get(role spectypes.BeaconRole) ParticipantStore { s, ok := qs.m.Get(role) if !ok { return nil @@ -28,14 +27,14 @@ func (qs *ParticipantStores) Get(role spectypes.BeaconRole) qbftstorage.Particip } // Add store to sync map by role as a key -func (qs *ParticipantStores) Add(role spectypes.BeaconRole, store qbftstorage.ParticipantStore) { +func (qs *ParticipantStores) Add(role spectypes.BeaconRole, store ParticipantStore) { qs.m.Set(role, store) } // Each iterates over all stores and calls the given function -func (qs *ParticipantStores) Each(f func(role spectypes.BeaconRole, store qbftstorage.ParticipantStore) error) error { +func (qs *ParticipantStores) Each(f func(role spectypes.BeaconRole, store ParticipantStore) error) error { var err error - qs.m.Range(func(role spectypes.BeaconRole, store qbftstorage.ParticipantStore) bool { + qs.m.Range(func(role spectypes.BeaconRole, store ParticipantStore) bool { err = f(role, store) return err == nil }) diff --git a/protocol/v2/testing/testutils.go b/ibft/storage/testutils.go similarity index 96% rename from protocol/v2/testing/testutils.go rename to ibft/storage/testutils.go index 64d77d5c32..e40b9bdf4a 100644 --- a/protocol/v2/testing/testutils.go +++ b/ibft/storage/testutils.go @@ -1,4 +1,4 @@ -package testing +package storage import ( "crypto/rsa" @@ -17,7 +17,6 @@ import ( "golang.org/x/mod/modfile" "golang.org/x/mod/module" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" "github.com/ssvlabs/ssv/ssvsigner/keys/rsaencryption" ) @@ -62,8 +61,8 @@ func CreateMultipleStoredInstances( start specqbft.Height, end specqbft.Height, generator MsgGenerator, -) ([]*qbftstorage.StoredInstance, error) { - results := make([]*qbftstorage.StoredInstance, 0) +) ([]*StoredInstance, error) { + results := make([]*StoredInstance, 0) for i := start; i <= end; i++ { signers, msg := generator(i) if msg == nil { @@ -76,7 +75,7 @@ func CreateMultipleStoredInstances( return nil, err } - results = append(results, &qbftstorage.StoredInstance{ + results = append(results, &StoredInstance{ State: &specqbft.State{ ID: qbftMsg.Identifier, Round: qbftMsg.Round, diff --git a/migrations/migrations.go b/migrations/migrations.go index b36da14cd1..a934a8e5c3 100644 --- a/migrations/migrations.go +++ b/migrations/migrations.go @@ -8,12 +8,13 @@ import ( "github.com/pkg/errors" "go.uber.org/zap" + "github.com/ssvlabs/ssv/ssvsigner/ekm" + "github.com/ssvlabs/ssv/ssvsigner/keys" + "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/observability/log/fields" operatorstorage "github.com/ssvlabs/ssv/operator/storage" - "github.com/ssvlabs/ssv/ssvsigner/ekm" - "github.com/ssvlabs/ssv/ssvsigner/keys" "github.com/ssvlabs/ssv/storage/basedb" ) @@ -103,7 +104,7 @@ func (m Migrations) Run(ctx context.Context, logger *zap.Logger, opt Options) (a } applied++ - logger.Debug("migration applied successfully", fields.Name(migration.Name), fields.Duration(start)) + logger.Debug("migration applied successfully", fields.Name(migration.Name), fields.Took(time.Since(start))) } logger.Info("applied migrations successfully", fields.Count(applied)) diff --git a/network/discovery/dv5_service.go b/network/discovery/dv5_service.go index 8a685c923b..5be97880b5 100644 --- a/network/discovery/dv5_service.go +++ b/network/discovery/dv5_service.go @@ -439,10 +439,11 @@ func (dvs *DiscV5Service) PublishENR() { // Log metrics. dvs.logger.Debug("done publishing ENR", - fields.Duration(start), + fields.Took(time.Since(start)), zap.Int("unique_peers", len(peerIDs)), zap.Int("pings", pings), - zap.Int("errors", errs)) + zap.Int("errors", errs), + ) } func (dvs *DiscV5Service) createLocalNode(discOpts *Options, ipAddr net.IP) (*enode.LocalNode, error) { diff --git a/observability/log/fields/fields.go b/observability/log/fields/fields.go index c386f29492..c3d33244f0 100644 --- a/observability/log/fields/fields.go +++ b/observability/log/fields/fields.go @@ -39,6 +39,7 @@ const ( FieldPreConsensusTime = "pre_consensus_time" FieldPostConsensusTime = "post_consensus_time" FieldConsensusTime = "consensus_time" + FieldConsensusRounds = "consensus_rounds" FieldBlockTime = "block_time" FieldCount = "count" FieldEstimatedCurrentEpoch = "estimated_current_epoch" @@ -157,10 +158,6 @@ func BindIP(val net.IP) zapcore.Field { return zap.Stringer(FieldBindIP, val) } -func Duration(val time.Time) zapcore.Field { - return zap.Stringer(FieldDuration, stringer.Float64Stringer{Val: time.Since(val).Seconds()}) -} - func BlockCacheMetrics(metrics *ristretto.Metrics) zapcore.Field { return zap.Stringer(FieldBlockCacheMetrics, metrics) } @@ -234,31 +231,31 @@ func Topic(val string) zap.Field { } func PreConsensusTime(val time.Duration) zap.Field { - return zap.String(FieldPreConsensusTime, utils.FormatDuration(val)) + return zap.String(FieldPreConsensusTime, durationToSecondsStr(val)) } func ConsensusTime(val time.Duration) zap.Field { - return zap.String(FieldConsensusTime, strconv.FormatFloat(val.Seconds(), 'f', 5, 64)) + return zap.String(FieldConsensusTime, durationToSecondsStr(val)) } -func PostConsensusTime(val time.Duration) zap.Field { - return zap.String(FieldPostConsensusTime, utils.FormatDuration(val)) +func ConsensusRounds(val uint64) zap.Field { + return zap.Uint64(FieldConsensusRounds, val) } -func BlockTime(val time.Duration) zap.Field { - return zap.String(FieldBlockTime, utils.FormatDuration(val)) +func PostConsensusTime(val time.Duration) zap.Field { + return zap.String(FieldPostConsensusTime, durationToSecondsStr(val)) } -func SubmissionTime(val time.Duration) zap.Field { - return zap.String(FieldSubmissionTime, utils.FormatDuration(val)) +func BlockTime(val time.Duration) zap.Field { + return zap.String(FieldBlockTime, durationToSecondsStr(val)) } func TotalConsensusTime(val time.Duration) zap.Field { - return zap.String(FieldTotalConsensusTime, utils.FormatDuration(val)) + return zap.String(FieldTotalConsensusTime, durationToSecondsStr(val)) } func TotalDutyTime(val time.Duration) zap.Field { - return zap.String(FieldTotalDutyTime, utils.FormatDuration(val)) + return zap.String(FieldTotalDutyTime, durationToSecondsStr(val)) } func DutyID(val string) zap.Field { @@ -317,6 +314,7 @@ func Duties(epoch phase0.Epoch, duties []*spectypes.ValidatorDuty, truncateAfter func Root(r [32]byte) zap.Field { return zap.String("root", hex.EncodeToString(r[:])) } + func BlockRoot(r [32]byte) zap.Field { return zap.String("block_root", hex.EncodeToString(r[:])) } @@ -336,3 +334,8 @@ func Owner(addr common.Address) zap.Field { func Type(v any) zapcore.Field { return zap.String(FieldType, fmt.Sprintf("%T", v)) } + +func durationToSecondsStr(val time.Duration) string { + valStr := strconv.FormatFloat(val.Seconds(), 'f', 5, 64) + return valStr + "s" +} diff --git a/observability/utils/format.go b/observability/utils/format.go index e6f8e884d6..8e5f8424b4 100644 --- a/observability/utils/format.go +++ b/observability/utils/format.go @@ -2,9 +2,7 @@ package utils import ( "fmt" - "strconv" "strings" - "time" spectypes "github.com/ssvlabs/ssv-spec/types" ) @@ -20,7 +18,3 @@ func FormatCommittee(operators []spectypes.OperatorID) string { } return strings.Join(opids, "_") } - -func FormatDuration(val time.Duration) string { - return strconv.FormatFloat(val.Seconds(), 'f', 5, 64) -} diff --git a/operator/duties/attester.go b/operator/duties/attester.go index a5ec8b9d9f..830b7672d2 100644 --- a/operator/duties/attester.go +++ b/operator/duties/attester.go @@ -298,7 +298,8 @@ func (h *AttesterHandler) fetchAndProcessDuties(ctx context.Context, epoch phase fields.Count(len(duties)), fields.Epoch(epoch), fields.Duties(epoch, specDuties, truncate), - fields.Duration(start)) + fields.Took(time.Since(start)), + ) // Further processing is not needed in exporter mode, terminate early // avoiding CL subscriptions saves some CPU & Network resources diff --git a/operator/duties/observability.go b/operator/duties/observability.go index 32e5f61a42..b76f1a53ec 100644 --- a/operator/duties/observability.go +++ b/operator/duties/observability.go @@ -28,15 +28,15 @@ var ( metric.WithDescription("delay of the slot ticker in seconds"), metric.WithExplicitBucketBoundaries(metrics.SecondsHistogramBuckets...))) - dutiesExecutedCounter = metrics.New( + dutiesScheduledCounter = metrics.New( meter.Int64Counter( observability.InstrumentName(observabilityNamespace, "scheduler.executions"), metric.WithUnit("{duty}"), - metric.WithDescription("total number of duties executed by scheduler"))) + metric.WithDescription("total number of duties scheduled for execution"))) ) func recordDutyScheduled(ctx context.Context, role types.RunnerRole, slotDelay time.Duration) { runnerRoleAttr := metric.WithAttributes(observability.RunnerRoleAttribute(role)) - dutiesExecutedCounter.Add(ctx, 1, runnerRoleAttr) + dutiesScheduledCounter.Add(ctx, 1, runnerRoleAttr) slotDelayHistogram.Record(ctx, slotDelay.Seconds(), runnerRoleAttr) } diff --git a/operator/duties/proposer.go b/operator/duties/proposer.go index 00a61636d2..fab69f07da 100644 --- a/operator/duties/proposer.go +++ b/operator/duties/proposer.go @@ -246,7 +246,8 @@ func (h *ProposerHandler) fetchAndProcessDuties(ctx context.Context, epoch phase fields.Count(len(duties)), fields.Epoch(epoch), fields.Duties(epoch, specDuties, truncate), - fields.Duration(start)) + fields.Took(time.Since(start)), + ) span.SetStatus(codes.Ok, "") return nil diff --git a/operator/duties/scheduler.go b/operator/duties/scheduler.go index b2af816656..ec94d1ff54 100644 --- a/operator/duties/scheduler.go +++ b/operator/duties/scheduler.go @@ -409,7 +409,7 @@ func (s *Scheduler) HandleHeadEvent() func(ctx context.Context, event *eth2apiv1 } } -// ExecuteDuties tries to execute the given duties +// ExecuteDuties tries to execute the provided validator duties func (s *Scheduler) ExecuteDuties(ctx context.Context, duties []*spectypes.ValidatorDuty) { if s.exporterMode { // We never execute duties in exporter mode. The handler should skip calling this method. @@ -427,6 +427,10 @@ func (s *Scheduler) ExecuteDuties(ctx context.Context, duties []*spectypes.Valid for _, duty := range duties { logger := s.loggerWithDutyContext(duty) + const eventMsg = "🔧 executing validator duty" + logger.Debug(eventMsg) + span.AddEvent(eventMsg) + slotDelay := time.Since(s.beaconConfig.SlotStartTime(duty.Slot)) if slotDelay >= 100*time.Millisecond { const eventMsg = "⚠️ late duty execution" @@ -455,7 +459,7 @@ func (s *Scheduler) ExecuteDuties(ctx context.Context, duties []*spectypes.Valid span.SetStatus(codes.Ok, "") } -// ExecuteCommitteeDuties tries to execute the given committee duties +// ExecuteCommitteeDuties tries to execute the provided committee duties func (s *Scheduler) ExecuteCommitteeDuties(ctx context.Context, duties committeeDutiesMap) { if s.exporterMode { // We never execute duties in exporter mode. The handler should skip calling this method. @@ -469,6 +473,7 @@ func (s *Scheduler) ExecuteCommitteeDuties(ctx context.Context, duties committee for _, committee := range duties { duty := committee.duty + logger := s.loggerWithCommitteeDutyContext(committee) const eventMsg = "🔧 executing committee duty" @@ -499,7 +504,7 @@ func (s *Scheduler) ExecuteCommitteeDuties(ctx context.Context, duties committee logger.Warn("parent-context has no deadline set") } - s.waitOneThirdOrValidBlock(duty.Slot) + s.waitOneThirdIntoSlotOrValidBlock(duty.Slot) s.dutyExecutor.ExecuteCommitteeDuty(dutyCtx, logger, committee.id, duty) }() } @@ -541,19 +546,33 @@ func (s *Scheduler) loggerWithCommitteeDutyContext(committeeDuty *committeeDuty) // advanceHeadSlot will set s.headSlot to the provided slot (but only if the provided slot is higher, // meaning s.headSlot value can never decrease) and notify the go-routines waiting for it to happen. func (s *Scheduler) advanceHeadSlot(slot phase0.Slot) { + s.logger.Debug("advancing head slot (maybe)") + defer s.logger.Debug("advancing head slot (done)") + s.waitCond.L.Lock() if slot > s.headSlot { + s.logger.Debug("advancing head slot", + zap.Uint64("prev_head_slot", uint64(s.headSlot)), + zap.Uint64("slot", uint64(slot)), + ) s.headSlot = slot s.waitCond.Broadcast() } s.waitCond.L.Unlock() } -// waitOneThirdOrValidBlock waits until one-third of the slot has passed (SECONDS_PER_SLOT / 3 seconds after -// slot start time), or for head block event that might come in even sooner than one-third of the slot passes. -func (s *Scheduler) waitOneThirdOrValidBlock(slot phase0.Slot) { +// waitOneThirdIntoSlotOrValidBlock waits until one-third of the slot has passed (SECONDS_PER_SLOT / 3 seconds after +// slot start time), or for a head block event that might come in even sooner than one-third of the slot passes. +func (s *Scheduler) waitOneThirdIntoSlotOrValidBlock(slot phase0.Slot) { + s.logger.Debug("waiting 1/3 into slot (maybe)") + defer s.logger.Debug("waiting 1/3 into slot (done)") + s.waitCond.L.Lock() for s.headSlot < slot { + s.logger.Debug("waiting 1/3 into slot", + zap.Uint64("current_head_slot", uint64(s.headSlot)), + zap.Uint64("slot", uint64(slot)), + ) s.waitCond.Wait() } s.waitCond.L.Unlock() diff --git a/operator/duties/sync_committee.go b/operator/duties/sync_committee.go index e45eb8c4f2..e78f338df8 100644 --- a/operator/duties/sync_committee.go +++ b/operator/duties/sync_committee.go @@ -339,7 +339,8 @@ func (h *SyncCommitteeHandler) prepareDutiesResultLog(period uint64, duties []*e fields.Count(len(duties)), zap.String("period", fmt.Sprintf("p%v", period)), zap.Any("duties", b.String()), - fields.Duration(start)) + fields.Took(time.Since(start)), + ) } func (h *SyncCommitteeHandler) toSpecDuty(duty *eth2apiv1.SyncCommitteeDuty, slot phase0.Slot, role spectypes.BeaconRole) *spectypes.ValidatorDuty { diff --git a/operator/dutytracer/collector.go b/operator/dutytracer/collector.go index d2c3953e52..e452a8765c 100644 --- a/operator/dutytracer/collector.go +++ b/operator/dutytracer/collector.go @@ -27,6 +27,7 @@ import ( "github.com/ssvlabs/ssv/operator/duties/dutystore" "github.com/ssvlabs/ssv/operator/slotticker" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" + ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" registrystorage "github.com/ssvlabs/ssv/registry/storage" "github.com/ssvlabs/ssv/utils/hashmap" ) @@ -393,7 +394,7 @@ func (c *Collector) processPartialSigCommittee(receivedAt uint64, msg *spectypes trace.OperatorIDs = cmt.Operators } - signer := msg.Messages[0].Signer + signer := ssvtypes.PartialSigMsgSigner(msg) var attIdxs []phase0.ValidatorIndex var scIdxs []phase0.ValidatorIndex @@ -776,7 +777,7 @@ func (c *Collector) collect(ctx context.Context, msg *queue.SSVMessage, verifySi tr := &exporter.PartialSigTrace{ Type: pSigMessages.Type, BeaconRoot: pSigMessages.Messages[0].SigningRoot, - Signer: pSigMessages.Messages[0].Signer, + Signer: ssvtypes.PartialSigMsgSigner(pSigMessages), ReceivedTime: startTime, } diff --git a/operator/dutytracer/store.go b/operator/dutytracer/store.go index 93a7b6b8f0..c7dfcbcb5c 100644 --- a/operator/dutytracer/store.go +++ b/operator/dutytracer/store.go @@ -7,7 +7,6 @@ import ( "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/hashicorp/go-multierror" "github.com/pkg/errors" - spectypes "github.com/ssvlabs/ssv-spec/types" "github.com/ssvlabs/ssv/exporter" diff --git a/protocol/v2/qbft/controller/controller.go b/protocol/v2/qbft/controller/controller.go index e6d9c94b11..6fc1329429 100644 --- a/protocol/v2/qbft/controller/controller.go +++ b/protocol/v2/qbft/controller/controller.go @@ -8,18 +8,17 @@ import ( "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/pkg/errors" + specqbft "github.com/ssvlabs/ssv-spec/qbft" + spectypes "github.com/ssvlabs/ssv-spec/types" "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" - specqbft "github.com/ssvlabs/ssv-spec/qbft" - spectypes "github.com/ssvlabs/ssv-spec/types" - + qbftstorage "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability" "github.com/ssvlabs/ssv/observability/traces" "github.com/ssvlabs/ssv/protocol/v2/qbft" "github.com/ssvlabs/ssv/protocol/v2/qbft/instance" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" "github.com/ssvlabs/ssv/protocol/v2/ssv" ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" ) @@ -59,41 +58,41 @@ func NewController( } } -// StartNewInstance will start a new QBFT instance, if can't will return error +// StartNewInstance will attempt to start a new QBFT instance. func (c *Controller) StartNewInstance( ctx context.Context, logger *zap.Logger, height specqbft.Height, value []byte, valueChecker ssv.ValueChecker, -) error { +) (*instance.Instance, error) { ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "qbft.controller.start"), trace.WithAttributes(observability.BeaconSlotAttribute(phase0.Slot(height)))) defer span.End() if err := valueChecker.CheckValue(value); err != nil { - return traces.Errorf(span, "value invalid: %w", err) + return nil, traces.Errorf(span, "value invalid: %w", err) } if height < c.Height { - return spectypes.WrapError(spectypes.StartInstanceErrorCode, traces.Errorf(span, "attempting to start an instance with a past height")) + return nil, spectypes.WrapError(spectypes.StartInstanceErrorCode, traces.Errorf(span, "attempting to start an instance with a past height")) } if c.StoredInstances.FindInstance(height) != nil { - return spectypes.WrapError(spectypes.InstanceAlreadyRunningErrorCode, traces.Errorf(span, "instance already running")) + return nil, spectypes.WrapError(spectypes.InstanceAlreadyRunningErrorCode, traces.Errorf(span, "instance already running")) } c.Height = height - newInstance := c.addAndStoreNewInstance() - - span.AddEvent("start new instance") + newInstance := instance.NewInstance(c.GetConfig(), c.CommitteeMember, c.Identifier, c.Height, c.OperatorSigner) + c.StoredInstances.addNewInstance(newInstance) newInstance.Start(ctx, logger, value, height, valueChecker) c.forceStopAllInstanceExceptCurrent() span.SetStatus(codes.Ok, "") - return nil + + return newInstance, nil } func (c *Controller) forceStopAllInstanceExceptCurrent() { @@ -195,13 +194,6 @@ func (c *Controller) isFutureMessage(msg *specqbft.ProcessingMessage) bool { return false } -// addAndStoreNewInstance returns creates a new QBFT instance, stores it in an array and returns it -func (c *Controller) addAndStoreNewInstance() *instance.Instance { - i := instance.NewInstance(c.GetConfig(), c.CommitteeMember, c.Identifier, c.Height, c.OperatorSigner) - c.StoredInstances.addNewInstance(i) - return i -} - // GetRoot returns the state's deterministic root func (c *Controller) GetRoot() ([32]byte, error) { marshaledRoot, err := json.Marshal(c) diff --git a/protocol/v2/qbft/controller/controller_test.go b/protocol/v2/qbft/controller/controller_test.go index 4649791975..89fef4f9df 100644 --- a/protocol/v2/qbft/controller/controller_test.go +++ b/protocol/v2/qbft/controller/controller_test.go @@ -2,19 +2,19 @@ package controller import ( "context" - "encoding/json" "testing" specqbft "github.com/ssvlabs/ssv-spec/qbft" spectestingutils "github.com/ssvlabs/ssv-spec/types/testingutils" "github.com/stretchr/testify/require" + "github.com/ssvlabs/ssv/ssvsigner/ekm" + "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/qbft" "github.com/ssvlabs/ssv/protocol/v2/qbft/instance" "github.com/ssvlabs/ssv/protocol/v2/qbft/roundtimer" "github.com/ssvlabs/ssv/protocol/v2/types" - "github.com/ssvlabs/ssv/ssvsigner/ekm" ) func TestController_Marshaling(t *testing.T) { @@ -66,25 +66,17 @@ func TestController_OnTimeoutWithRoundCheck(t *testing.T) { contr := &Controller{} // Initialize EventMsg for the test - timeoutData := types.TimeoutData{ + timeoutData := &types.TimeoutData{ Height: specqbft.FirstHeight, Round: specqbft.FirstRound, } - data, err := json.Marshal(timeoutData) - require.NoError(t, err) - - msg := &types.EventMsg{ - Type: types.Timeout, - Data: data, - } - // Simulate a scenario where the instance is at a higher round inst.State.Round = specqbft.Round(2) contr.StoredInstances.addNewInstance(inst) // Call OnTimeout and capture the error - err = contr.OnTimeout(context.TODO(), logger, *msg) + err := contr.OnTimeout(context.TODO(), logger, timeoutData) // Assert that the error is nil and the round did not bump require.NoError(t, err) @@ -94,7 +86,7 @@ func TestController_OnTimeoutWithRoundCheck(t *testing.T) { inst.State.Round = specqbft.FirstRound // Call OnTimeout and capture the error - err = contr.OnTimeout(context.TODO(), logger, *msg) + err = contr.OnTimeout(context.TODO(), logger, timeoutData) // Assert that the error is nil and the round did bump require.NoError(t, err) diff --git a/protocol/v2/qbft/controller/timer.go b/protocol/v2/qbft/controller/timer.go index 226189fdc1..213883cb69 100644 --- a/protocol/v2/qbft/controller/timer.go +++ b/protocol/v2/qbft/controller/timer.go @@ -13,16 +13,10 @@ import ( ) // OnTimeout is trigger upon timeout for the given height -func (c *Controller) OnTimeout(ctx context.Context, logger *zap.Logger, msg types.EventMsg) error { - // TODO add validation +func (c *Controller) OnTimeout(ctx context.Context, logger *zap.Logger, timeoutData *types.TimeoutData) error { ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "on_timeout")) defer span.End() - timeoutData, err := msg.GetTimeoutData() - if err != nil { - return traces.Errorf(span, "failed to get timeout data: %w", err) - } - span.SetAttributes( observability.DutyRoundAttribute(timeoutData.Round), observability.BeaconSlotAttribute(phase0.Slot(timeoutData.Height)), @@ -42,7 +36,7 @@ func (c *Controller) OnTimeout(ctx context.Context, logger *zap.Logger, msg type } if decided, _ := instance.IsDecided(); decided { - span.AddEvent("instance is decided") + span.AddEvent("QBFT instance is already decided") span.SetStatus(codes.Ok, "") return nil } diff --git a/protocol/v2/qbft/controller/types.go b/protocol/v2/qbft/controller/types.go index f4b3c18ff4..c1f9c8146d 100644 --- a/protocol/v2/qbft/controller/types.go +++ b/protocol/v2/qbft/controller/types.go @@ -13,7 +13,6 @@ import ( const ( // InstanceContainerDefaultCapacity is the default capacity for InstanceContainer. InstanceContainerDefaultCapacity int = 2 - // InstanceContainerTestCapacity is the capacity for InstanceContainer used in tests. InstanceContainerTestCapacity int = 1024 ) @@ -21,8 +20,8 @@ const ( // InstanceContainer is a fixed-capacity container for instances. type InstanceContainer []*instance.Instance -func (i InstanceContainer) FindInstance(height specqbft.Height) *instance.Instance { - for _, inst := range i { +func (ic *InstanceContainer) FindInstance(height specqbft.Height) *instance.Instance { + for _, inst := range *ic { if inst != nil { if inst.GetHeight() == height { return inst @@ -32,64 +31,65 @@ func (i InstanceContainer) FindInstance(height specqbft.Height) *instance.Instan return nil } -// addNewInstance will add the new instance at index 0, pushing all other stored InstanceContainer one index up (ejecting last one if existing) -func (i *InstanceContainer) addNewInstance(instance *instance.Instance) { - if cap(*i) == 0 { - *i = make(InstanceContainer, 0, InstanceContainerDefaultCapacity) +// addNewInstance will add the new instance at index 0, pushing all other stored InstanceContainer one index up +// (ejecting the last one if necessary) +func (ic *InstanceContainer) addNewInstance(instance *instance.Instance) { + if cap(*ic) == 0 { + *ic = make(InstanceContainer, 0, InstanceContainerDefaultCapacity) } - indexToInsert := len(*i) - for index, existingInstance := range *i { + indexToInsert := len(*ic) + for index, existingInstance := range *ic { if existingInstance.GetHeight() < instance.GetHeight() { indexToInsert = index break } } - if indexToInsert == len(*i) { + if indexToInsert == len(*ic) { // Append the new instance, only if there's free space for it. - if len(*i) < cap(*i) { - *i = append(*i, instance) + if len(*ic) < cap(*ic) { + *ic = append(*ic, instance) } } else { - if len(*i) == cap(*i) { + if len(*ic) == cap(*ic) { // Shift the instances to the right and insert the new instance. - copy((*i)[indexToInsert+1:], (*i)[indexToInsert:]) - (*i)[indexToInsert] = instance + copy((*ic)[indexToInsert+1:], (*ic)[indexToInsert:]) + (*ic)[indexToInsert] = instance } else { // Shift the instances to the right and append the new instance. - *i = append(*i, nil) - copy((*i)[indexToInsert+1:], (*i)[indexToInsert:]) - (*i)[indexToInsert] = instance + *ic = append(*ic, nil) + copy((*ic)[indexToInsert+1:], (*ic)[indexToInsert:]) + (*ic)[indexToInsert] = instance } } } // String returns a human-readable representation of the instances. Useful for debugging. -func (i *InstanceContainer) String() string { - heights := make([]string, len(*i)) - for index, inst := range *i { +func (ic *InstanceContainer) String() string { + heights := make([]string, len(*ic)) + for index, inst := range *ic { heights[index] = fmt.Sprint(inst.GetHeight()) } - return fmt.Sprintf("Instances(len=%d, cap=%d, heights=(%s))", len(*i), cap(*i), strings.Join(heights, ", ")) + return fmt.Sprintf("Instances(len=%d, cap=%d, heights=(%s))", len(*ic), cap(*ic), strings.Join(heights, ", ")) } // UnmarshalJSON implements the json.Unmarshaler interface for InstanceContainer -func (c *InstanceContainer) UnmarshalJSON(data []byte) error { +func (ic *InstanceContainer) UnmarshalJSON(data []byte) error { // InstanceContainer must always have correct capacity on initialization // because addition to instance container doesn't grow beyond cap removing values that don't fit. // Therefore, we need to initialize it properly on unmarshaling // to allow spec tests grow StoredInstances as much as they need to. instances := make([]*instance.Instance, 0, InstanceContainerTestCapacity) - if cap(*c) != 0 { - instances = *c + if cap(*ic) != 0 { + instances = *ic } if err := json.Unmarshal(data, &instances); err != nil { return err } - *c = instances + *ic = instances return nil } diff --git a/protocol/v2/qbft/instance/commit.go b/protocol/v2/qbft/instance/commit.go index aa7122f010..68a3f222e4 100644 --- a/protocol/v2/qbft/instance/commit.go +++ b/protocol/v2/qbft/instance/commit.go @@ -18,10 +18,9 @@ import ( // UponCommit returns true if a quorum of commit messages was received. // Assumes commit message is valid! func (i *Instance) UponCommit(ctx context.Context, logger *zap.Logger, msg *specqbft.ProcessingMessage) (bool, []byte, *spectypes.SignedSSVMessage, error) { - logger.Debug("📬 got commit message", - fields.QBFTRound(i.State.Round), - zap.Any("commit_signers", msg.SignedMessage.OperatorIDs), - fields.Root(msg.QBFTMessage.Root)) + logger = logger.With(fields.Root(msg.QBFTMessage.Root)) + + logger.Debug("📬 got commit message", zap.Any("commit_signers", msg.SignedMessage.OperatorIDs)) addMsg, err := i.State.CommitContainer.AddFirstMsgForSignerAndRound(msg) if err != nil { @@ -45,10 +44,7 @@ func (i *Instance) UponCommit(ctx context.Context, logger *zap.Logger, msg *spec return false, nil, nil, errors.Wrap(err, "could not aggregate commit msgs") } - logger.Debug("🎯 got commit quorum", - fields.QBFTRound(i.State.Round), - zap.Any("agg_signers", agg.OperatorIDs), - fields.Root(msg.QBFTMessage.Root)) + logger.Debug("🎯 got commit quorum", zap.Any("agg_signers", agg.OperatorIDs)) i.metrics.EndStage(ctx, i.State.Round, stageCommit) diff --git a/protocol/v2/qbft/instance/instance.go b/protocol/v2/qbft/instance/instance.go index e40c09f641..a8ddd5402c 100644 --- a/protocol/v2/qbft/instance/instance.go +++ b/protocol/v2/qbft/instance/instance.go @@ -4,7 +4,6 @@ import ( "context" "encoding/hex" "encoding/json" - "sync" "github.com/attestantio/go-eth2-client/spec/phase0" "github.com/pkg/errors" @@ -30,7 +29,6 @@ type Instance struct { signer ssvtypes.OperatorSigner processMsgF *spectypes.ThreadSafeF - startOnce sync.Once forceStop bool StartValue []byte @@ -82,58 +80,57 @@ func (i *Instance) Start( height specqbft.Height, valueChecker ssv.ValueChecker, ) { - i.startOnce.Do(func() { - _, span := tracer.Start(ctx, - observability.InstrumentName(observabilityNamespace, "qbft.instance.start"), - trace.WithAttributes(observability.BeaconSlotAttribute(phase0.Slot(height)))) - defer span.End() - - i.StartValue = value - i.bumpToRound(specqbft.FirstRound) - i.State.Height = height - i.ValueChecker = valueChecker - i.metrics.Start() - i.config.GetTimer().TimeoutForRound(height, specqbft.FirstRound) - - logger = logger.With( - fields.QBFTRound(i.State.Round), - fields.QBFTHeight(i.State.Height)) - - proposerID := i.ProposerForRound(specqbft.FirstRound) - const eventMsg = "ℹ️ starting QBFT instance" - logger.Debug(eventMsg, zap.Uint64("leader", proposerID)) - span.AddEvent(eventMsg, trace.WithAttributes(observability.ValidatorProposerAttribute(proposerID))) - - // propose if this node is the proposer - if proposerID == i.State.CommitteeMember.OperatorID { - proposal, err := i.CreateProposal(i.StartValue, nil, nil) - if err != nil { - logger.Warn("❗ failed to create proposal", zap.Error(err)) - span.SetStatus(codes.Error, err.Error()) - return - // TODO align spec to add else to avoid broadcast errored proposal - } + _, span := tracer.Start(ctx, + observability.InstrumentName(observabilityNamespace, "qbft.instance.start"), + trace.WithAttributes(observability.BeaconSlotAttribute(phase0.Slot(height)))) + defer span.End() + + proposerID := i.ProposerForRound(specqbft.FirstRound) + + const startingQBFTInstanceEvent = "ℹ️ starting QBFT instance" + logger.Debug( + startingQBFTInstanceEvent, + zap.Uint64("us", i.State.CommitteeMember.OperatorID), + zap.Uint64("leader", proposerID), + ) + span.AddEvent(startingQBFTInstanceEvent, trace.WithAttributes(observability.ValidatorProposerAttribute(proposerID))) + + i.StartValue = value + i.bumpToRound(specqbft.FirstRound) + i.State.Height = height + i.ValueChecker = valueChecker + i.metrics.Start() + i.config.GetTimer().TimeoutForRound(height, specqbft.FirstRound) + + // propose if this node is the proposer + if proposerID == i.State.CommitteeMember.OperatorID { + proposal, err := i.CreateProposal(i.StartValue, nil, nil) + if err != nil { + logger.Warn("❗ failed to create proposal", zap.Error(err)) + span.SetStatus(codes.Error, err.Error()) + return + // TODO align spec to add else to avoid broadcast errored proposal + } - r, err := specqbft.HashDataRoot(i.StartValue) // TODO (better than decoding?) - if err != nil { - logger.Warn("❗ failed to hash input data", zap.Error(err)) - span.SetStatus(codes.Error, err.Error()) - return - } + r, err := specqbft.HashDataRoot(i.StartValue) // TODO (better than decoding?) + if err != nil { + logger.Warn("❗ failed to hash input data", zap.Error(err)) + span.SetStatus(codes.Error, err.Error()) + return + } - logger = logger.With(fields.Root(r)) - const eventMsg = "📢 leader broadcasting proposal message" - logger.Debug(eventMsg) - span.AddEvent(eventMsg, trace.WithAttributes(attribute.String("root", hex.EncodeToString(r[:])))) + logger = logger.With(fields.Root(r)) + const eventMsg = "📢 leader broadcasting proposal message" + logger.Debug(eventMsg) + span.AddEvent(eventMsg, trace.WithAttributes(attribute.String("root", hex.EncodeToString(r[:])))) - if err := i.Broadcast(proposal); err != nil { - logger.Warn("❌ failed to broadcast proposal", zap.Error(err)) - span.RecordError(err) - } + if err := i.Broadcast(proposal); err != nil { + logger.Warn("❌ failed to broadcast proposal", zap.Error(err)) + span.RecordError(err) } + } - span.SetStatus(codes.Ok, "") - }) + span.SetStatus(codes.Ok, "") } func (i *Instance) Broadcast(msg *spectypes.SignedSSVMessage) error { diff --git a/protocol/v2/qbft/instance/prepare.go b/protocol/v2/qbft/instance/prepare.go index 8eb1d9a610..d03ed762a0 100644 --- a/protocol/v2/qbft/instance/prepare.go +++ b/protocol/v2/qbft/instance/prepare.go @@ -28,10 +28,10 @@ func (i *Instance) uponPrepare(ctx context.Context, logger *zap.Logger, msg *spe } proposedRoot := i.State.ProposalAcceptedForCurrentRound.QBFTMessage.Root - logger.Debug("📬 got prepare message", - fields.QBFTRound(i.State.Round), - zap.Any("prepare_signers", msg.SignedMessage.OperatorIDs), - fields.Root(proposedRoot)) + + logger = logger.With(fields.Root(proposedRoot)) + + logger.Debug("📬 got prepare message", zap.Any("prepare_signers", msg.SignedMessage.OperatorIDs)) if hasQuorumBefore { return nil // already moved to commit stage @@ -47,18 +47,15 @@ func (i *Instance) uponPrepare(ctx context.Context, logger *zap.Logger, msg *spe i.metrics.EndStage(ctx, i.State.Round, stagePrepare) logger.Debug("🎯 got prepare quorum", - fields.QBFTRound(i.State.Round), - zap.Any("prepare_signers", allSigners(i.State.PrepareContainer.MessagesForRound(i.State.Round)))) + zap.Any("prepare_signers", allSigners(i.State.PrepareContainer.MessagesForRound(i.State.Round))), + ) commitMsg, err := i.CreateCommit(proposedRoot) if err != nil { return errors.Wrap(err, "could not create commit msg") } - logger.Debug("📢 broadcasting commit message", - fields.QBFTRound(i.State.Round), - zap.Any("commit_signers", commitMsg.OperatorIDs), - fields.Root(proposedRoot)) + logger.Debug("📢 broadcasting commit message", zap.Any("commit_signers", commitMsg.OperatorIDs)) if err := i.Broadcast(commitMsg); err != nil { return errors.Wrap(err, "failed to broadcast commit message") diff --git a/protocol/v2/qbft/instance/proposal.go b/protocol/v2/qbft/instance/proposal.go index ff0529e549..24d4d99166 100644 --- a/protocol/v2/qbft/instance/proposal.go +++ b/protocol/v2/qbft/instance/proposal.go @@ -10,7 +10,6 @@ import ( spectypes "github.com/ssvlabs/ssv-spec/types" "go.uber.org/zap" - "github.com/ssvlabs/ssv/observability/log/fields" ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" ) @@ -25,16 +24,17 @@ func (i *Instance) uponProposal(ctx context.Context, logger *zap.Logger, msg *sp return nil // uponProposal was already called } - logger.Debug("📬 got proposal message", - fields.QBFTRound(i.State.Round), - zap.Any("proposal_signers", msg.SignedMessage.OperatorIDs)) + logger = logger.With(zap.Any("proposal_signers", msg.SignedMessage.OperatorIDs)) + + logger.Debug("📬 got proposal message") - newRound := msg.QBFTMessage.Round i.State.ProposalAcceptedForCurrentRound = msg + newRound := msg.QBFTMessage.Round + // A future justified proposal should bump us into future round and reset timer - if msg.QBFTMessage.Round > i.State.Round { - i.config.GetTimer().TimeoutForRound(msg.QBFTMessage.Height, msg.QBFTMessage.Round) + if newRound > i.State.Round { + i.config.GetTimer().TimeoutForRound(msg.QBFTMessage.Height, newRound) } i.bumpToRound(newRound) @@ -51,10 +51,7 @@ func (i *Instance) uponProposal(ctx context.Context, logger *zap.Logger, msg *sp return errors.Wrap(err, "could not create prepare msg") } - logger.Debug("📢 got proposal, broadcasting prepare message", - fields.QBFTRound(i.State.Round), - zap.Any("proposal_signers", msg.SignedMessage.OperatorIDs), - zap.Any("prepare_signers", prepare.OperatorIDs)) + logger.Debug("📢 got proposal, broadcasting prepare message", zap.Any("prepare_signers", prepare.OperatorIDs)) if err := i.Broadcast(prepare); err != nil { return errors.Wrap(err, "failed to broadcast prepare message") diff --git a/protocol/v2/qbft/instance/round_change.go b/protocol/v2/qbft/instance/round_change.go index f8658681ad..7684be4d8e 100644 --- a/protocol/v2/qbft/instance/round_change.go +++ b/protocol/v2/qbft/instance/round_change.go @@ -36,14 +36,14 @@ func (i *Instance) uponRoundChange( } logger = logger.With( - fields.QBFTRound(i.State.Round), - fields.QBFTHeight(i.State.Height), - zap.Uint64("msg_round", uint64(msg.QBFTMessage.Round)), + zap.Uint64("qbft_instance_round", uint64(i.State.Round)), + zap.Uint64("qbft_instance_height", uint64(i.State.Height)), ) logger.Debug("🔄 got round change", fields.Root(msg.QBFTMessage.Root), - zap.Any("round_change_signers", msg.SignedMessage.OperatorIDs)) + zap.Any("round_change_signers", msg.SignedMessage.OperatorIDs), + ) justifiedRoundChangeMsg, valueToPropose, err := i.hasReceivedProposalJustificationForLeadingRound(msg) if err != nil { @@ -76,9 +76,9 @@ func (i *Instance) uponRoundChange( i.metrics.RecordRoundChange(ctx, msg.QBFTMessage.Round, reasonJustified) logger.Debug("🔄 got justified round change, broadcasting proposal message", - fields.QBFTRound(i.State.Round), zap.Any("round_change_signers", allSigners(i.State.RoundChangeContainer.MessagesForRound(i.State.Round))), - fields.Root(r)) + fields.Root(r), + ) if err := i.Broadcast(proposal); err != nil { return errors.Wrap(err, "failed to broadcast proposal message") @@ -115,12 +115,11 @@ func (i *Instance) uponChangeRoundPartialQuorum(logger *zap.Logger, newRound spe return errors.Wrap(err, "failed to hash instance start value") } - logger.Debug("📢 got partial quorum, broadcasting round change message", - fields.QBFTRound(i.State.Round), + logger.Debug("📢 broadcasting round change message (got partial quorum)", + zap.Uint64("qbft_new_round", uint64(newRound)), fields.Root(root), zap.Any("round_change_signers", roundChange.OperatorIDs), - fields.QBFTHeight(i.State.Height), - zap.String("reason", "partial-quorum")) + ) if err := i.Broadcast(roundChange); err != nil { return errors.Wrap(err, "failed to broadcast round change message") diff --git a/protocol/v2/qbft/instance/timeout.go b/protocol/v2/qbft/instance/timeout.go index 6f2d0fe61e..c1d6f86b94 100644 --- a/protocol/v2/qbft/instance/timeout.go +++ b/protocol/v2/qbft/instance/timeout.go @@ -21,18 +21,25 @@ func (i *Instance) UponRoundTimeout(ctx context.Context, logger *zap.Logger) err return types.WrapError(types.TimeoutInstanceErrorCode, traces.Errorf(span, "instance stopped processing timeouts")) } + root, err := specqbft.HashDataRoot(i.StartValue) + if err != nil { + return traces.Errorf(span, "could not calculate root for round change: %w", err) + } + + logger = logger.With(fields.Root(root)) + + logger.Debug("⌛ round timed out") + prevRound := i.State.Round newRound := prevRound + 1 - logger.Debug("⌛ round timed out", fields.QBFTRound(prevRound)) - // TODO: previously this was done outside of a defer, which caused the // round to be bumped before the round change message was created & broadcasted. // Remember to track the impact of this change and revert/modify if necessary. defer func() { i.bumpToRound(newRound) i.State.ProposalAcceptedForCurrentRound = nil - i.config.GetTimer().TimeoutForRound(i.State.Height, i.State.Round) + i.config.GetTimer().TimeoutForRound(i.State.Height, newRound) }() roundChange, err := i.CreateRoundChange(newRound) @@ -40,26 +47,16 @@ func (i *Instance) UponRoundTimeout(ctx context.Context, logger *zap.Logger) err return traces.Errorf(span, "could not generate round change msg: %w", err) } - root, err := specqbft.HashDataRoot(i.StartValue) - if err != nil { - return traces.Errorf(span, "could not calculate root for round change: %w", err) - } - i.metrics.RecordRoundChange(ctx, prevRound, reasonTimeout) - const eventMsg = "📢 broadcasting round change message" - span.AddEvent(eventMsg, - trace.WithAttributes( - observability.BeaconBlockRootAttribute(root), - observability.DutyRoundAttribute(i.State.Round), - )) - - logger.Debug(eventMsg, - fields.QBFTRound(i.State.Round), + const eventMsg = "📢 broadcasting round change message (this round timed out)" + span.AddEvent(eventMsg, trace.WithAttributes(observability.BeaconBlockRootAttribute(root), observability.DutyRoundAttribute(prevRound))) + logger.Debug( + eventMsg, + zap.Uint64("qbft_new_round", uint64(newRound)), fields.Root(root), zap.Any("round_change_signers", roundChange.OperatorIDs), - fields.QBFTHeight(i.State.Height), - zap.String("reason", "timeout")) + ) if err := i.Broadcast(roundChange); err != nil { return traces.Errorf(span, "failed to broadcast round change message: %w", err) diff --git a/protocol/v2/qbft/spectest/controller_type.go b/protocol/v2/qbft/spectest/controller_type.go index 23fd3494c2..94965b07aa 100644 --- a/protocol/v2/qbft/spectest/controller_type.go +++ b/protocol/v2/qbft/spectest/controller_type.go @@ -19,11 +19,11 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/zap" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/qbft" "github.com/ssvlabs/ssv/protocol/v2/qbft/controller" "github.com/ssvlabs/ssv/protocol/v2/qbft/roundtimer" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" ) @@ -54,8 +54,8 @@ func RunControllerSpecTest(t *testing.T, test *spectests.ControllerSpecTest) { func generateController(logger *zap.Logger) *controller.Controller { identifier := spectestingutils.TestingIdentifier - config := qbfttesting.TestingConfig(logger, spectestingutils.Testing4SharesSet()) - return qbfttesting.NewTestingQBFTController( + config := protocoltesting.TestingConfig(logger, spectestingutils.Testing4SharesSet()) + return protocoltesting.NewTestingQBFTController( spectestingutils.Testing4SharesSet(), identifier[:], spectestingutils.TestingCommitteeMember(spectestingutils.Testing4SharesSet()), @@ -148,8 +148,8 @@ func runInstanceWithData( contr *controller.Controller, runData *spectests.RunInstanceData, ) error { - err := contr.StartNewInstance(context.TODO(), logger, height, runData.InputValue, qbfttesting.TestingValueChecker{}) var lastErr error + _, err := contr.StartNewInstance(context.TODO(), logger, height, runData.InputValue, protocoltesting.TestingValueChecker{}) if err != nil { lastErr = err } @@ -171,7 +171,7 @@ func runInstanceWithData( } func overrideStateComparisonForControllerSpecTest(t *testing.T, test *spectests.ControllerSpecTest) { - specDir, err := protocoltesting.GetSpecDir("", filepath.Join("qbft", "spectest")) + specDir, err := storage.GetSpecDir("", filepath.Join("qbft", "spectest")) require.NoError(t, err) specDir = filepath.Join(specDir, "generate") dir := typescomparable.GetSCDir(specDir, reflect.TypeOf(test).String()) diff --git a/protocol/v2/qbft/spectest/msg_processing_type.go b/protocol/v2/qbft/spectest/msg_processing_type.go index 96d5799768..45650e13a8 100644 --- a/protocol/v2/qbft/spectest/msg_processing_type.go +++ b/protocol/v2/qbft/spectest/msg_processing_type.go @@ -16,10 +16,10 @@ import ( typescomparable "github.com/ssvlabs/ssv-spec/types/testingutils/comparable" "github.com/stretchr/testify/require" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/qbft" "github.com/ssvlabs/ssv/protocol/v2/qbft/instance" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" ) @@ -33,13 +33,13 @@ func RunMsgProcessing(t *testing.T, test *spectests.MsgProcessingSpecTest) { ks := spectestingutils.KeySetForCommitteeMember(test.Pre.State.CommitteeMember) signer := spectestingutils.NewOperatorSigner(ks, 1) pre := instance.NewInstance( - qbfttesting.TestingConfig(logger, ks), + protocoltesting.TestingConfig(logger, ks), test.Pre.State.CommitteeMember, test.Pre.State.ID, test.Pre.State.Height, signer, ) - pre.ValueChecker = qbfttesting.TestingValueChecker{} + pre.ValueChecker = protocoltesting.TestingValueChecker{} require.NoError(t, pre.Decode(preByts)) preInstance := pre @@ -87,7 +87,7 @@ func RunMsgProcessing(t *testing.T, test *spectests.MsgProcessingSpecTest) { } func overrideStateComparisonForMsgProcessingSpecTest(t *testing.T, test *spectests.MsgProcessingSpecTest) { - specDir, err := protocoltesting.GetSpecDir("", filepath.Join("qbft", "spectest")) + specDir, err := storage.GetSpecDir("", filepath.Join("qbft", "spectest")) require.NoError(t, err) test.PostState, err = typescomparable.UnmarshalStateComparison(specDir, test.TestName(), reflect.TypeOf(test).String(), diff --git a/protocol/v2/qbft/spectest/qbft_mapping_test.go b/protocol/v2/qbft/spectest/qbft_mapping_test.go index aaf40023b8..062aa8861a 100644 --- a/protocol/v2/qbft/spectest/qbft_mapping_test.go +++ b/protocol/v2/qbft/spectest/qbft_mapping_test.go @@ -12,15 +12,15 @@ import ( "github.com/ssvlabs/ssv-spec/types/testingutils" "github.com/stretchr/testify/require" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/qbft/instance" - testing2 "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" ) func TestQBFTMapping(t *testing.T) { path, _ := os.Getwd() - jsonTests, err := protocoltesting.GenerateSpecTestJSON(path, "qbft") + jsonTests, err := storage.GenerateSpecTestJSON(path, "qbft") require.NoError(t, err) untypedTests := map[string]interface{}{} @@ -99,7 +99,7 @@ func TestQBFTMapping(t *testing.T) { ks := testingutils.Testing4SharesSet() signer := testingutils.NewOperatorSigner(ks, 1) pre := instance.NewInstance( - testing2.TestingConfig(logger, testingutils.KeySetForCommitteeMember(typedTest.Pre.State.CommitteeMember)), + protocoltesting.TestingConfig(logger, testingutils.KeySetForCommitteeMember(typedTest.Pre.State.CommitteeMember)), typedTest.Pre.State.CommitteeMember, typedTest.Pre.State.ID, typedTest.Pre.State.Height, diff --git a/protocol/v2/ssv/runner/aggregator.go b/protocol/v2/ssv/runner/aggregator.go index 1e36c8249d..f5e87126af 100644 --- a/protocol/v2/ssv/runner/aggregator.go +++ b/protocol/v2/ssv/runner/aggregator.go @@ -39,7 +39,7 @@ type AggregatorRunner struct { network specqbft.Network signer ekm.BeaconSigner operatorSigner ssvtypes.OperatorSigner - measurements measurementsStore + measurements *dutyMeasurements // ValCheck is used to validate the qbft-value(s) proposed by other Operators. ValCheck ssv.ValueChecker @@ -90,7 +90,7 @@ func NewAggregatorRunner( signer: signer, operatorSigner: operatorSigner, ValCheck: valCheck, - measurements: NewMeasurementsStore(), + measurements: newMeasurementsStore(), IsAggregator: isAggregatorFn(), }, nil @@ -114,7 +114,7 @@ func (r *AggregatorRunner) ProcessPreConsensus(ctx context.Context, logger *zap. )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing selection proof message: %w", err) } @@ -125,7 +125,7 @@ func (r *AggregatorRunner) ProcessPreConsensus(ctx context.Context, logger *zap. return nil } - epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()) + epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()) recordSuccessfulQuorum(ctx, 1, epoch, spectypes.BNRoleAggregator, attributeConsensusPhasePreConsensus) r.measurements.EndPreConsensus() @@ -136,24 +136,23 @@ func (r *AggregatorRunner) ProcessPreConsensus(ctx context.Context, logger *zap. // reconstruct selection proof sig span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - fullSig, err := r.GetState().ReconstructBeaconSig(r.GetState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + fullSig, err := r.bState().ReconstructBeaconSig(r.bState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) return traces.Errorf(span, "got pre-consensus quorum but it has invalid signatures: %w", err) } - // signer must be same for all messages, at least 1 message must be present (this is validated prior) - signer := signedMsg.Messages[0].Signer - duty := r.GetState().CurrentDuty.(*spectypes.ValidatorDuty) + signer := ssvtypes.PartialSigMsgSigner(signedMsg) + duty := r.bState().CurrentDuty.(*spectypes.ValidatorDuty) span.SetAttributes( observability.CommitteeIndexAttribute(duty.CommitteeIndex), observability.ValidatorIndexAttribute(duty.ValidatorIndex), ) - const gotPartialSigQuorumEvent = "🧩 got partial signature quorum" - span.AddEvent(gotPartialSigQuorumEvent, trace.WithAttributes(observability.ValidatorSignerAttribute(signer))) - logger.Debug(gotPartialSigQuorumEvent, zap.Any("signer", signer)) + const gotPartialSignaturesEvent = "🧩 got partial aggregator selection proof signatures" + span.AddEvent(gotPartialSignaturesEvent, trace.WithAttributes(observability.ValidatorSignerAttribute(signer))) + logger.Debug(gotPartialSignaturesEvent, zap.Any("signer", signer)) // this is the earliest in aggregator runner flow where we get to know whether we are meant // to perform this aggregation duty or not @@ -166,8 +165,6 @@ func (r *AggregatorRunner) ProcessPreConsensus(ctx context.Context, logger *zap. return nil } - r.measurements.PauseDutyFlow() - span.AddEvent("submitting aggregate and proof", trace.WithAttributes( observability.CommitteeIndexAttribute(duty.CommitteeIndex), @@ -176,7 +173,10 @@ func (r *AggregatorRunner) ProcessPreConsensus(ctx context.Context, logger *zap. if err != nil { return traces.Errorf(span, "failed to submit aggregate and proof: %w", err) } - r.measurements.ContinueDutyFlow() + + const submittedAggregateAndProofEvent = "submitted aggregate and proof" + logger.Debug(submittedAggregateAndProofEvent) + span.AddEvent(submittedAggregateAndProofEvent) byts, err := res.MarshalSSZ() if err != nil { @@ -188,6 +188,8 @@ func (r *AggregatorRunner) ProcessPreConsensus(ctx context.Context, logger *zap. DataSSZ: byts, } + r.measurements.StartConsensus() + if err := r.BaseRunner.decide(ctx, logger, r, duty.Slot, input, r.ValCheck); err != nil { return traces.Errorf(span, "can't start new duty runner instance for duty: %w", err) } @@ -206,7 +208,7 @@ func (r *AggregatorRunner) ProcessConsensus(ctx context.Context, logger *zap.Log )) defer span.End() - span.AddEvent("checking if instance is decided") + span.AddEvent("processing QBFT consensus msg") decided, encDecidedValue, err := r.BaseRunner.baseConsensusMsgProcessing(ctx, logger, r.ValCheck.CheckValue, signedMsg, &spectypes.ValidatorConsensusData{}) if err != nil { return traces.Errorf(span, "failed processing consensus message: %w", err) @@ -219,7 +221,6 @@ func (r *AggregatorRunner) ProcessConsensus(ctx context.Context, logger *zap.Log return nil } - span.AddEvent("instance is decided") r.measurements.EndConsensus() recordConsensusDuration(ctx, r.measurements.ConsensusTime(), spectypes.RoleAggregator) @@ -286,6 +287,10 @@ func (r *AggregatorRunner) ProcessConsensus(ctx context.Context, logger *zap.Log return traces.Errorf(span, "can't broadcast partial post consensus sig: %w", err) } + const broadcastedPostConsensusMsgEvent = "broadcasted post consensus partial signature message" + logger.Debug(broadcastedPostConsensusMsgEvent) + span.AddEvent(broadcastedPostConsensusMsgEvent) + span.SetStatus(codes.Ok, "") return nil } @@ -299,7 +304,7 @@ func (r *AggregatorRunner) ProcessPostConsensus(ctx context.Context, logger *zap )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePostConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePostConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing post consensus message: %w", err) } @@ -322,17 +327,17 @@ func (r *AggregatorRunner) ProcessPostConsensus(ctx context.Context, logger *zap root := roots[0] span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - sig, err := r.GetState().ReconstructBeaconSig(r.GetState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + sig, err := r.bState().ReconstructBeaconSig(r.bState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PostConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PostConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) return traces.Errorf(span, "got post-consensus quorum but it has invalid signatures: %w", err) } specSig := phase0.BLSSignature{} copy(specSig[:], sig) cd := &spectypes.ValidatorConsensusData{} - err = cd.Decode(r.GetState().DecidedValue) + err = cd.Decode(r.bState().DecidedValue) if err != nil { return traces.Errorf(span, "could not decode consensus data: %w", err) } @@ -346,44 +351,54 @@ func (r *AggregatorRunner) ProcessPostConsensus(ctx context.Context, logger *zap return traces.Errorf(span, "could not construct versioned aggregate and proof: %w", err) } + const submittingSignedAggregateProofEvent = "submitting signed aggregate and proof" + logger.Debug(submittingSignedAggregateProofEvent) + span.AddEvent(submittingSignedAggregateProofEvent) + start := time.Now() if err := r.GetBeaconNode().SubmitSignedAggregateSelectionProof(ctx, msg); err != nil { recordFailedSubmission(ctx, spectypes.BNRoleAggregator) const errMsg = "could not submit to Beacon chain reconstructed contribution and proof" - logger.Error(errMsg, fields.SubmissionTime(time.Since(start)), zap.Error(err)) + logger.Error(errMsg, fields.Took(time.Since(start)), zap.Error(err)) return traces.Errorf(span, "%s: %w", errMsg, err) } - const eventMsg = "✅ successful submitted aggregate" - span.AddEvent(eventMsg) - logger.Debug( - eventMsg, - fields.SubmissionTime(time.Since(start)), - fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), - fields.TotalDutyTime(r.measurements.TotalDutyTime()), - ) + const submittedSignedAggregateProofEvent = "✅ successfully submitted signed aggregate and proof" + span.AddEvent(submittedSignedAggregateProofEvent) + logger.Debug(submittedSignedAggregateProofEvent, fields.Took(time.Since(start))) - r.GetState().Finished = true + r.bState().Finished = true r.measurements.EndDutyFlow() - recordDutyDuration(ctx, r.measurements.TotalDutyTime(), spectypes.BNRoleAggregator, r.GetState().RunningInstance.State.Round) - recordSuccessfulSubmission(ctx, 1, r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()), spectypes.BNRoleAggregator) + recordDutyDuration(ctx, r.measurements.TotalDutyTime(), spectypes.BNRoleAggregator, r.bState().RunningInstance.State.Round) + recordSuccessfulSubmission(ctx, 1, r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()), spectypes.BNRoleAggregator) + + const dutyFinishedEvent = "successfully finished duty processing" + logger.Info(dutyFinishedEvent, + fields.PreConsensusTime(r.measurements.PreConsensusTime()), + fields.ConsensusTime(r.measurements.ConsensusTime()), + fields.ConsensusRounds(uint64(r.bState().RunningInstance.State.Round)), + fields.PostConsensusTime(r.measurements.PostConsensusTime()), + fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), + fields.TotalDutyTime(r.measurements.TotalDutyTime()), + ) + span.AddEvent(dutyFinishedEvent) span.SetStatus(codes.Ok, "") return nil } -func (r *AggregatorRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return r.BaseRunner.OnTimeoutQBFT(ctx, logger, msg) +func (r *AggregatorRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return r.BaseRunner.OnTimeoutQBFT(ctx, logger, timeoutData) } func (r *AggregatorRunner) expectedPreConsensusRootsAndDomain() ([]ssz.HashRoot, phase0.DomainType, error) { - return []ssz.HashRoot{spectypes.SSZUint64(r.GetState().CurrentDuty.DutySlot())}, spectypes.DomainSelectionProof, nil + return []ssz.HashRoot{spectypes.SSZUint64(r.bState().CurrentDuty.DutySlot())}, spectypes.DomainSelectionProof, nil } // expectedPostConsensusRootsAndDomain an INTERNAL function, returns the expected post-consensus roots to sign func (r *AggregatorRunner) expectedPostConsensusRootsAndDomain(context.Context) ([]ssz.HashRoot, phase0.DomainType, error) { cd := &spectypes.ValidatorConsensusData{} - err := cd.Decode(r.GetState().DecidedValue) + err := cd.Decode(r.bState().DecidedValue) if err != nil { return nil, spectypes.DomainError, errors.Wrap(err, "could not create consensus data") } @@ -423,8 +438,13 @@ func (r *AggregatorRunner) executeDuty(ctx context.Context, logger *zap.Logger, spectypes.DomainSelectionProof, ) if err != nil { - return traces.Errorf(span, "could not sign randao: %w", err) + return traces.Errorf(span, "could not sign aggregator selection proof: %w", err) } + + const signedSelectionProofEvent = "signed aggregator selection proof" + logger.Debug(signedSelectionProofEvent) + span.AddEvent(signedSelectionProofEvent) + msgs := &spectypes.PartialSignatureMessages{ Type: spectypes.SelectionProofPartialSig, Slot: duty.DutySlot(), @@ -437,8 +457,6 @@ func (r *AggregatorRunner) executeDuty(ctx context.Context, logger *zap.Logger, return traces.Errorf(span, "could not encode selection proof partial signature message: %w", err) } - r.measurements.StartConsensus() - ssvMsg := &spectypes.SSVMessage{ MsgType: spectypes.SSVPartialSignatureMsgType, MsgID: msgID, @@ -486,7 +504,7 @@ func (r *AggregatorRunner) GetShare() *spectypes.Share { return nil } -func (r *AggregatorRunner) GetState() *State { +func (r *AggregatorRunner) bState() *State { return r.BaseRunner.State } diff --git a/protocol/v2/ssv/runner/committee.go b/protocol/v2/ssv/runner/committee.go index a994f0c6c0..5b34946083 100644 --- a/protocol/v2/ssv/runner/committee.go +++ b/protocol/v2/ssv/runner/committee.go @@ -54,7 +54,7 @@ type CommitteeRunner struct { operatorSigner ssvtypes.OperatorSigner DutyGuard CommitteeDutyGuard doppelgangerHandler DoppelgangerProvider - measurements measurementsStore + measurements *dutyMeasurements // ValCheck is used to validate the qbft-value(s) proposed by other Operators. ValCheck ssv.ValueChecker @@ -94,7 +94,7 @@ func NewCommitteeRunner( submittedDuties: make(map[spectypes.BeaconRole]map[phase0.ValidatorIndex]struct{}), DutyGuard: dutyGuard, doppelgangerHandler: doppelgangerHandler, - measurements: NewMeasurementsStore(), + measurements: newMeasurementsStore(), }, nil } @@ -249,6 +249,10 @@ func (cr *CommitteeRunner) GetBeaconSigner() ekm.BeaconSigner { return cr.signer } +func (r *CommitteeRunner) bState() *State { + return r.BaseRunner.State +} + func (cr *CommitteeRunner) HasRunningDuty() bool { return cr.BaseRunner.hasRunningDuty() } @@ -267,7 +271,7 @@ func (cr *CommitteeRunner) ProcessConsensus(ctx context.Context, logger *zap.Log )) defer span.End() - span.AddEvent("checking if instance is decided") + span.AddEvent("processing QBFT consensus msg") decided, decidedValue, err := cr.BaseRunner.baseConsensusMsgProcessing(ctx, logger, cr.ValCheck.CheckValue, msg, &spectypes.BeaconVote{}) if err != nil { return traces.Errorf(span, "failed processing consensus message: %w", err) @@ -280,7 +284,6 @@ func (cr *CommitteeRunner) ProcessConsensus(ctx context.Context, logger *zap.Log return nil } - span.AddEvent("instance is decided") cr.measurements.EndConsensus() recordConsensusDuration(ctx, cr.measurements.ConsensusTime(), spectypes.RoleCommittee) @@ -429,6 +432,7 @@ listener: if totalAttestations == 0 && totalSyncCommittee == 0 { cr.BaseRunner.State.Finished = true + cr.measurements.EndDutyFlow() span.SetStatus(codes.Error, ErrNoValidDutiesToExecute.Error()) return ErrNoValidDutiesToExecute } @@ -474,11 +478,18 @@ listener: SSVMessage: ssvMsg, } - span.AddEvent("broadcasting post consensus partial signature message") + const signedPostEvent = "constructed & signed post consensus partial signature message" + logger.Debug(signedPostEvent) + span.AddEvent(signedPostEvent) + if err := cr.GetNetwork().Broadcast(ssvMsg.MsgID, msgToBroadcast); err != nil { return traces.Errorf(span, "can't broadcast partial post consensus sig: %w", err) } + const broadcastedPostConsensusMsgEvent = "broadcasted post consensus partial signature message" + logger.Debug(broadcastedPostConsensusMsgEvent) + span.AddEvent(broadcastedPostConsensusMsgEvent) + span.SetStatus(codes.Ok, "") return nil } @@ -558,24 +569,24 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap defer span.End() span.AddEvent("base post consensus message processing") - hasQuorum, roots, err := cr.BaseRunner.basePostConsensusMsgProcessing(ctx, cr, signedMsg) + hasQuorum, quorumRoots, err := cr.BaseRunner.basePostConsensusMsgProcessing(ctx, logger, cr, signedMsg) if err != nil { return traces.Errorf(span, "failed processing post consensus message: %w", err) } - indices := make([]uint64, len(signedMsg.Messages)) - for i, msg := range signedMsg.Messages { - indices[i] = uint64(msg.ValidatorIndex) + vIndices := make([]uint64, 0, len(signedMsg.Messages)) + for _, msg := range signedMsg.Messages { + vIndices = append(vIndices, uint64(msg.ValidatorIndex)) } - logger = logger.With(fields.ConsensusTime(cr.measurements.ConsensusTime())) - const eventMsg = "🧩 got partial signatures" + const eventMsg = "🧩 got partial signatures (post consensus)" span.AddEvent(eventMsg) logger.Debug(eventMsg, + zap.Uint64("signer", ssvtypes.PartialSigMsgSigner(signedMsg)), + zap.Uint64s("validators", vIndices), zap.Bool("quorum", hasQuorum), - zap.Uint64("signer", signedMsg.Messages[0].Signer), - zap.Int("roots", len(roots)), - zap.Uint64s("validators", indices)) + zap.Int("quorum_roots", len(quorumRoots)), + ) if !hasQuorum { span.AddEvent("no quorum") @@ -583,7 +594,9 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap return nil } - span.AddEvent("getting attestations, sync committees and root beacon objects") + cr.measurements.EndPostConsensus() + recordPostConsensusDuration(ctx, cr.measurements.PostConsensusTime(), spectypes.RoleCommittee) + // Get validator-root maps for attestations and sync committees, and the root-beacon object map attestationMap, committeeMap, beaconObjects, err := cr.expectedPostConsensusRootsAndBeaconObjects(ctx, logger) if err != nil { @@ -591,6 +604,7 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap } if len(beaconObjects) == 0 { cr.BaseRunner.State.Finished = true + cr.measurements.EndDutyFlow() span.SetStatus(codes.Error, ErrNoValidDutiesToExecute.Error()) return ErrNoValidDutiesToExecute } @@ -600,7 +614,7 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap // Get unique roots to avoid repetition deduplicatedRoots := make(map[[32]byte]struct{}) - for _, root := range roots { + for _, root := range quorumRoots { deduplicatedRoots[root] = struct{}{} } @@ -658,7 +672,7 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap share := cr.BaseRunner.Share[validatorIndex] pubKey := share.ValidatorPubKey - vlogger := logger.With(zap.Uint64("validator_index", uint64(validatorIndex)), zap.String("pubkey", hex.EncodeToString(pubKey[:]))) + vLogger := logger.With(zap.Uint64("validator_index", uint64(validatorIndex)), zap.String("pubkey", hex.EncodeToString(pubKey[:]))) sig, err := cr.BaseRunner.State.ReconstructBeaconSig(cr.BaseRunner.State.PostConsensusContainer, root, pubKey[:], validatorIndex) // If the reconstructed signature verification failed, fall back to verifying each partial signature @@ -668,13 +682,13 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap } const eventMsg = "got post-consensus quorum but it has invalid signatures" span.AddEvent(eventMsg) - vlogger.Error(eventMsg, zap.Error(err)) + vLogger.Error(eventMsg, zap.Error(err)) errCh <- fmt.Errorf("%s: %w", eventMsg, err) return } - vlogger.Debug("🧩 reconstructed partial signature") + vLogger.Debug("🧩 reconstructed partial signature") signatureCh <- signatureResult{ validatorIndex: validatorIndex, @@ -741,22 +755,23 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap ) } - cr.measurements.EndPostConsensus() - recordPostConsensusDuration(ctx, cr.measurements.PostConsensusTime(), spectypes.RoleCommittee) - - logger = logger.With(fields.PostConsensusTime(cr.measurements.PostConsensusTime())) - attestations := make([]*spec.VersionedAttestation, 0, len(attestationsToSubmit)) for _, att := range attestationsToSubmit { if att != nil && att.ValidatorIndex != nil { attestations = append(attestations, att) } } + if len(attestations) > 0 { + validators := make([]phase0.ValidatorIndex, 0, len(attestations)) + for _, att := range attestations { + validators = append(validators, *att.ValidatorIndex) + } + aLogger := logger.With(zap.Any("validators", validators)) - cr.measurements.EndDutyFlow() + const submittingAttestationsEvent = "submitting attestations" + aLogger.Debug(submittingAttestationsEvent) + span.AddEvent(submittingAttestationsEvent) - if len(attestations) > 0 { - span.AddEvent("submitting attestations") submissionStart := time.Now() // Submit multiple attestations @@ -764,7 +779,7 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap recordFailedSubmission(ctx, spectypes.BNRoleAttester) const errMsg = "could not submit attestations" - logger.Error(errMsg, zap.Error(err)) + aLogger.Error(errMsg, zap.Error(err)) return traces.Errorf(span, "%s: %w", errMsg, err) } @@ -790,15 +805,9 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap observability.DutyRoundAttribute(cr.BaseRunner.State.RunningInstance.State.Round), observability.ValidatorCountAttribute(attestationsCount), )) - - logger.Info(eventMsg, - fields.Epoch(cr.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(cr.BaseRunner.State.CurrentDuty.DutySlot())), - fields.QBFTHeight(cr.BaseRunner.QBFTController.Height), - fields.QBFTRound(cr.BaseRunner.State.RunningInstance.State.Round), + aLogger.Info(eventMsg, fields.BlockRoot(attData.BeaconBlockRoot), - fields.SubmissionTime(time.Since(submissionStart)), - fields.TotalConsensusTime(cr.measurements.TotalConsensusTime()), - fields.TotalDutyTime(cr.measurements.TotalDutyTime()), + fields.Took(time.Since(submissionStart)), fields.Count(attestationsCount), ) @@ -808,20 +817,28 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap } } - // Submit multiple sync committee + // Submit multiple sync committee. syncCommitteeMessages := make([]*altair.SyncCommitteeMessage, 0, len(syncCommitteeMessagesToSubmit)) for _, syncMsg := range syncCommitteeMessagesToSubmit { syncCommitteeMessages = append(syncCommitteeMessages, syncMsg) } - if len(syncCommitteeMessages) > 0 { - span.AddEvent("submitting sync committee") + validators := make([]phase0.ValidatorIndex, 0, len(syncCommitteeMessages)) + for _, sc := range syncCommitteeMessages { + validators = append(validators, sc.ValidatorIndex) + } + scLogger := logger.With(zap.Any("validators", validators)) + + const submittingSyncCommitteeEvent = "submitting sync committee" + scLogger.Debug(submittingSyncCommitteeEvent) + span.AddEvent(submittingSyncCommitteeEvent) + submissionStart := time.Now() if err := cr.beacon.SubmitSyncMessages(ctx, syncCommitteeMessages); err != nil { recordFailedSubmission(ctx, spectypes.BNRoleSyncCommittee) const errMsg = "could not submit sync committee messages" - logger.Error(errMsg, zap.Error(err)) + scLogger.Error(errMsg, zap.Error(err)) return traces.Errorf(span, "%s: %w", errMsg, err) } @@ -836,6 +853,7 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap spectypes.BNRoleSyncCommittee, ) } + const eventMsg = "✅ successfully submitted sync committee" span.AddEvent(eventMsg, trace.WithAttributes( observability.BeaconSlotAttribute(cr.BaseRunner.State.CurrentDuty.DutySlot()), @@ -845,13 +863,9 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap attribute.Float64("ssv.validator.duty.submission_time", time.Since(submissionStart).Seconds()), attribute.Float64("ssv.validator.duty.consensus_time_total", time.Since(cr.measurements.consensusStart).Seconds()), )) - logger.Info(eventMsg, - fields.QBFTHeight(cr.BaseRunner.QBFTController.Height), - fields.QBFTRound(cr.BaseRunner.State.RunningInstance.State.Round), + scLogger.Info(eventMsg, fields.BlockRoot(syncCommitteeMessages[0].BeaconBlockRoot), - fields.SubmissionTime(time.Since(submissionStart)), - fields.TotalConsensusTime(cr.measurements.TotalConsensusTime()), - fields.TotalDutyTime(cr.measurements.TotalDutyTime()), + fields.Took(time.Since(submissionStart)), fields.Count(syncMsgsCount), ) @@ -866,17 +880,44 @@ func (cr *CommitteeRunner) ProcessPostConsensus(ctx context.Context, logger *zap return executionErr } - // Check if duty has terminated (runner has submitted for all duties) + // TODO - here we should actually "finish" the duty regardless of whether our success is absolute or partial, + // this is because quorum can only be reached once and we won't have another chance to "finish" the duty + // (by setting `cr.BaseRunner.State.Finished = true`). But for some reason spec-tests are written in such + // a way that we cannot "finish" the duty in case of partial success ... it wants absolute success for some + // reason, but like I said - the way this code is written, the execution can never get here for the 2nd time. if cr.HasSubmittedAllValidatorDuties(attestationMap, committeeMap) { cr.BaseRunner.State.Finished = true + cr.measurements.EndDutyFlow() + const dutyFinishedEvent = "successfully finished duty processing (absolute success)" + logger.Info(dutyFinishedEvent, + fields.ConsensusTime(cr.measurements.ConsensusTime()), + fields.ConsensusRounds(uint64(cr.bState().RunningInstance.State.Round)), + fields.PostConsensusTime(cr.measurements.PostConsensusTime()), + fields.TotalConsensusTime(cr.measurements.TotalConsensusTime()), + fields.TotalDutyTime(cr.measurements.TotalDutyTime()), + ) + span.AddEvent(dutyFinishedEvent) + span.SetStatus(codes.Ok, "") + return nil } + //cr.BaseRunner.State.Finished = true // TODO - gotta uncomment & refactor this when spec-tests allow for that + cr.measurements.EndDutyFlow() + const dutyFinishedEvent = "successfully finished duty processing (partial success)" + logger.Info(dutyFinishedEvent, + fields.ConsensusTime(cr.measurements.ConsensusTime()), + fields.ConsensusRounds(uint64(cr.bState().RunningInstance.State.Round)), + fields.PostConsensusTime(cr.measurements.PostConsensusTime()), + fields.TotalConsensusTime(cr.measurements.TotalConsensusTime()), + fields.TotalDutyTime(cr.measurements.TotalDutyTime()), + ) + span.AddEvent(dutyFinishedEvent) span.SetStatus(codes.Ok, "") return nil } -func (cr *CommitteeRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return cr.BaseRunner.OnTimeoutQBFT(ctx, logger, msg) +func (cr *CommitteeRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return cr.BaseRunner.OnTimeoutQBFT(ctx, logger, timeoutData) } // HasSubmittedAllValidatorDuties -- Returns true if the runner has done submissions for all validators for the given slot @@ -924,7 +965,7 @@ func findValidators( committeeMap map[phase0.ValidatorIndex][32]byte) (spectypes.BeaconRole, []phase0.ValidatorIndex, bool) { var validators []phase0.ValidatorIndex - // look for the expectedRoot in attestationMap + // look for the expectedRoot in the attestationMap for validator, root := range attestationMap { if root == expectedRoot { validators = append(validators, validator) @@ -933,7 +974,7 @@ func findValidators( if len(validators) > 0 { return spectypes.BNRoleAttester, validators, true } - // look for the expectedRoot in committeeMap + // look for the expectedRoot in the committeeMap for validator, root := range committeeMap { if root == expectedRoot { validators = append(validators, validator) @@ -945,12 +986,12 @@ func findValidators( return spectypes.BNRoleUnknown, nil, false } -// Unneeded since no preconsensus phase +// expectedPreConsensusRootsAndDomain is not needed because there is no pre-consensus phase. func (cr *CommitteeRunner) expectedPreConsensusRootsAndDomain() ([]ssz.HashRoot, phase0.DomainType, error) { return nil, spectypes.DomainError, errors.New("no pre consensus roots for committee runner") } -// This function signature returns only one domain type... but we can have mixed domains +// expectedPostConsensusRootsAndDomain signature returns only one domain type... but we can have mixed domains // instead we rely on expectedPostConsensusRootsAndBeaconObjects that is called later func (cr *CommitteeRunner) expectedPostConsensusRootsAndDomain(context.Context) ([]ssz.HashRoot, phase0.DomainType, error) { return nil, spectypes.DomainError, errors.New("unexpected expectedPostConsensusRootsAndDomain func call") @@ -965,7 +1006,6 @@ func (cr *CommitteeRunner) expectedPostConsensusRootsAndBeaconObjects(ctx contex syncCommitteeMap = make(map[phase0.ValidatorIndex][32]byte) beaconObjects = make(map[phase0.ValidatorIndex]map[[32]byte]interface{}) duty := cr.BaseRunner.State.CurrentDuty - // TODO DecidedValue should be interface?? beaconVoteData := cr.BaseRunner.State.DecidedValue beaconVote := &spectypes.BeaconVote{} if err := beaconVote.Decode(beaconVoteData); err != nil { @@ -1069,9 +1109,9 @@ func (cr *CommitteeRunner) executeDuty(ctx context.Context, logger *zap.Logger, return traces.Errorf(span, "failed to get attestation data: %w", err) } - logger = logger.With(zap.Duration("attestation_data_time", time.Since(start))) - - cr.measurements.StartConsensus() + const attestationDataFetchedEvent = "fetched attestation data from CL" + logger.Debug(attestationDataFetchedEvent, fields.Took(time.Since(start))) + span.AddEvent(attestationDataFetchedEvent) vote := &spectypes.BeaconVote{ BlockRoot: attData.BeaconBlockRoot, @@ -1079,6 +1119,8 @@ func (cr *CommitteeRunner) executeDuty(ctx context.Context, logger *zap.Logger, Target: attData.Target, } + cr.measurements.StartConsensus() + cr.ValCheck = ssv.NewVoteChecker( cr.signer, slot, diff --git a/protocol/v2/ssv/runner/measurements.go b/protocol/v2/ssv/runner/measurements.go index e86777416b..06993cdbbd 100644 --- a/protocol/v2/ssv/runner/measurements.go +++ b/protocol/v2/ssv/runner/measurements.go @@ -4,8 +4,8 @@ import ( "time" ) -// measurementsStore stores consensus durations -type measurementsStore struct { +// dutyMeasurements stores duty-related & consensus-related durations. +type dutyMeasurements struct { preConsensusStart time.Time consensusStart time.Time postConsensusStart time.Time @@ -16,102 +16,58 @@ type measurementsStore struct { dutyDuration time.Duration } -func NewMeasurementsStore() measurementsStore { - return measurementsStore{} +func newMeasurementsStore() *dutyMeasurements { + return &dutyMeasurements{} } -func (cm *measurementsStore) PreConsensusTime() time.Duration { +func (cm *dutyMeasurements) PreConsensusTime() time.Duration { return cm.preConsensusDuration } -func (cm *measurementsStore) ConsensusTime() time.Duration { + +func (cm *dutyMeasurements) ConsensusTime() time.Duration { return cm.consensusDuration } -func (cm *measurementsStore) PostConsensusTime() time.Duration { + +func (cm *dutyMeasurements) PostConsensusTime() time.Duration { return cm.postConsensusDuration } -func (cm *measurementsStore) TotalDutyTime() time.Duration { - return cm.dutyDuration -} -func (cm *measurementsStore) TotalConsensusTime() time.Duration { +func (cm *dutyMeasurements) TotalConsensusTime() time.Duration { return cm.preConsensusDuration + cm.consensusDuration + cm.postConsensusDuration } -// StartPreConsensus stores pre-consensus start time. -func (cm *measurementsStore) StartPreConsensus() { - if cm != nil { - cm.preConsensusStart = time.Now() - } -} - -// EndPreConsensus sends metrics for pre-consensus duration. -func (cm *measurementsStore) EndPreConsensus() { - if cm != nil && !cm.preConsensusStart.IsZero() { - duration := time.Since(cm.preConsensusStart) - cm.preConsensusDuration = duration - cm.preConsensusStart = time.Time{} - } +func (cm *dutyMeasurements) TotalDutyTime() time.Duration { + return cm.dutyDuration } -// StartConsensus stores consensus start time. -func (cm *measurementsStore) StartConsensus() { - if cm != nil { - cm.consensusStart = time.Now() - } +func (cm *dutyMeasurements) StartPreConsensus() { + cm.preConsensusStart = time.Now() } -// EndConsensus sends metrics for consensus duration. -func (cm *measurementsStore) EndConsensus() { - if cm != nil && !cm.consensusStart.IsZero() { - duration := time.Since(cm.consensusStart) - cm.consensusDuration = duration - cm.consensusStart = time.Time{} - } +func (cm *dutyMeasurements) EndPreConsensus() { + cm.preConsensusDuration = time.Since(cm.preConsensusStart) } -// StartPostConsensus stores post-consensus start time. -func (cm *measurementsStore) StartPostConsensus() { - if cm != nil { - cm.postConsensusStart = time.Now() - } +func (cm *dutyMeasurements) StartConsensus() { + cm.consensusStart = time.Now() } -// EndPostConsensus sends metrics for post-consensus duration. -func (cm *measurementsStore) EndPostConsensus() { - if cm != nil && !cm.postConsensusStart.IsZero() { - duration := time.Since(cm.postConsensusStart) - cm.postConsensusDuration = duration - cm.postConsensusStart = time.Time{} - } +func (cm *dutyMeasurements) EndConsensus() { + cm.consensusDuration = time.Since(cm.consensusStart) } -// StartDutyFlow stores duty full flow start time. -func (cm *measurementsStore) StartDutyFlow() { - if cm != nil { - cm.dutyStart = time.Now() - cm.dutyDuration = 0 - } +func (cm *dutyMeasurements) StartPostConsensus() { + cm.postConsensusStart = time.Now() } -// PauseDutyFlow stores duty full flow cumulative duration with ability to continue the flow. -func (cm *measurementsStore) PauseDutyFlow() { - if cm != nil { - cm.dutyDuration += time.Since(cm.dutyStart) - cm.dutyStart = time.Time{} - } +func (cm *dutyMeasurements) EndPostConsensus() { + cm.postConsensusDuration = time.Since(cm.postConsensusStart) } -// ContinueDutyFlow continues measuring duty full flow duration. -func (cm *measurementsStore) ContinueDutyFlow() { - if cm != nil { - cm.dutyStart = time.Now() - } +func (cm *dutyMeasurements) StartDutyFlow() { + cm.dutyStart = time.Now() } -// EndDutyFlow sends metrics for duty full flow duration. -func (cm *measurementsStore) EndDutyFlow() { - if cm != nil && !cm.dutyStart.IsZero() { - cm.dutyDuration += time.Since(cm.dutyStart) - cm.dutyStart = time.Time{} - } +func (cm *dutyMeasurements) EndDutyFlow() { + cm.dutyDuration = time.Since(cm.dutyStart) } diff --git a/protocol/v2/ssv/runner/proposer.go b/protocol/v2/ssv/runner/proposer.go index a222d2c980..722e759712 100644 --- a/protocol/v2/ssv/runner/proposer.go +++ b/protocol/v2/ssv/runner/proposer.go @@ -41,7 +41,7 @@ type ProposerRunner struct { signer ekm.BeaconSigner operatorSigner ssvtypes.OperatorSigner doppelgangerHandler DoppelgangerProvider - measurements measurementsStore + measurements *dutyMeasurements graffiti []byte // ValCheck is used to validate the qbft-value(s) proposed by other Operators. @@ -96,7 +96,7 @@ func NewProposerRunner( operatorSigner: operatorSigner, doppelgangerHandler: doppelgangerHandler, ValCheck: valCheck, - measurements: NewMeasurementsStore(), + measurements: newMeasurementsStore(), graffiti: graffiti, proposerDelay: proposerDelay, @@ -121,16 +121,14 @@ func (r *ProposerRunner) ProcessPreConsensus(ctx context.Context, logger *zap.Lo )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing randao message: %w", err) } - // signer must be same for all messages, at least 1 message must be present (this is validated prior) - signer := signedMsg.Messages[0].Signer - duty := r.GetState().CurrentDuty.(*spectypes.ValidatorDuty) + duty := r.bState().CurrentDuty.(*spectypes.ValidatorDuty) - logger.Debug("🧩 got partial RANDAO signatures", zap.Uint64("signer", signer)) + logger.Debug("🧩 got partial RANDAO signatures (pre consensus)", zap.Uint64("signer", ssvtypes.PartialSigMsgSigner(signedMsg))) // quorum returns true only once (first time quorum achieved) if !hasQuorum { @@ -147,15 +145,15 @@ func (r *ProposerRunner) ProcessPreConsensus(ctx context.Context, logger *zap.Lo // randao is relevant only for block proposals, no need to check type span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - fullSig, err := r.GetState().ReconstructBeaconSig(r.GetState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + fullSig, err := r.bState().ReconstructBeaconSig(r.bState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) return traces.Errorf(span, "got pre-consensus quorum but it has invalid signatures: %w", err) } logger.Debug( "🧩 reconstructed partial RANDAO signatures", - zap.Uint64s("signers", getPreConsensusSigners(r.GetState(), root)), + zap.Uint64s("signers", getPreConsensusSigners(r.bState(), root)), fields.PreConsensusTime(r.measurements.PreConsensusTime()), ) @@ -170,11 +168,15 @@ func (r *ProposerRunner) ProcessPreConsensus(ctx context.Context, logger *zap.Lo } } + waitedOutProposerDelayEvent := fmt.Sprintf("waited out proposer delay of %dms", r.proposerDelay.Milliseconds()) + logger.Debug(waitedOutProposerDelayEvent) + span.AddEvent(waitedOutProposerDelayEvent) + // Fetch the block our operator will propose if it is a Leader (note, even if our operator // isn't leading the 1st QBFT round it might become a Leader in case of round change - hence // we are always fetching Ethereum block here just in case we need to propose it). start := time.Now() - duty = r.GetState().CurrentDuty.(*spectypes.ValidatorDuty) + duty = r.bState().CurrentDuty.(*spectypes.ValidatorDuty) vBlk, _, err := r.GetBeaconNode().GetBeaconBlock(ctx, duty.Slot, r.graffiti, fullSig) if err != nil { @@ -264,7 +266,7 @@ func (r *ProposerRunner) ProcessConsensus(ctx context.Context, logger *zap.Logge )) defer span.End() - span.AddEvent("checking if instance is decided") + span.AddEvent("processing QBFT consensus msg") decided, decidedValue, err := r.BaseRunner.baseConsensusMsgProcessing(ctx, logger, r.ValCheck.CheckValue, signedMsg, &spectypes.ValidatorConsensusData{}) if err != nil { return traces.Errorf(span, "failed processing consensus message: %w", err) @@ -277,7 +279,6 @@ func (r *ProposerRunner) ProcessConsensus(ctx context.Context, logger *zap.Logge return nil } - span.AddEvent("instance is decided") r.measurements.EndConsensus() recordConsensusDuration(ctx, r.measurements.ConsensusTime(), spectypes.RoleProposer) @@ -354,12 +355,16 @@ func (r *ProposerRunner) ProcessConsensus(ctx context.Context, logger *zap.Logge return fmt.Errorf("can't broadcast partial post consensus sig: %w", err) } + const broadcastedPostConsensusMsgEvent = "broadcasted post consensus partial signature message" + logger.Debug(broadcastedPostConsensusMsgEvent) + span.AddEvent(broadcastedPostConsensusMsgEvent) + span.SetStatus(codes.Ok, "") return nil } -func (r *ProposerRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return r.BaseRunner.OnTimeoutQBFT(ctx, logger, msg) +func (r *ProposerRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return r.BaseRunner.OnTimeoutQBFT(ctx, logger, timeoutData) } func (r *ProposerRunner) ProcessPostConsensus(ctx context.Context, logger *zap.Logger, signedMsg *spectypes.PartialSignatureMessages) error { @@ -371,7 +376,7 @@ func (r *ProposerRunner) ProcessPostConsensus(ctx context.Context, logger *zap.L )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePostConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePostConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing post consensus message: %w", err) } @@ -381,38 +386,31 @@ func (r *ProposerRunner) ProcessPostConsensus(ctx context.Context, logger *zap.L return nil } + r.measurements.EndPostConsensus() + recordPostConsensusDuration(ctx, r.measurements.PostConsensusTime(), spectypes.RoleProposer) + // only 1 root, verified by expectedPostConsensusRootsAndDomain root := roots[0] span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - sig, err := r.GetState().ReconstructBeaconSig(r.GetState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + sig, err := r.bState().ReconstructBeaconSig(r.bState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PostConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PostConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) return traces.Errorf(span, "got post-consensus quorum but it has invalid signatures: %w", err) } specSig := phase0.BLSSignature{} copy(specSig[:], sig) - r.measurements.EndPostConsensus() - recordPostConsensusDuration(ctx, r.measurements.PostConsensusTime(), spectypes.RoleProposer) - logger.Debug("🧩 reconstructed partial post consensus signatures proposer", - zap.Uint64s("signers", getPostConsensusProposerSigners(r.GetState(), root)), - fields.PostConsensusTime(r.measurements.PostConsensusTime()), - fields.QBFTRound(r.GetState().RunningInstance.State.Round)) + zap.Uint64s("signers", getPostConsensusProposerSigners(r.bState(), root)), + ) r.doppelgangerHandler.ReportQuorum(r.GetShare().ValidatorIndex) - start := time.Now() - - logger = logger.With( - fields.PreConsensusTime(r.measurements.PreConsensusTime()), - fields.ConsensusTime(r.measurements.ConsensusTime()), - fields.PostConsensusTime(r.measurements.PostConsensusTime()), - fields.QBFTHeight(r.BaseRunner.QBFTController.Height), - fields.QBFTRound(r.GetState().RunningInstance.State.Round), - ) + const submittingBlockProposalEvent = "submitting block proposal" + span.AddEvent(submittingBlockProposalEvent) + logger.Info(submittingBlockProposalEvent) // If this operator is the leader of the decided round and it originally // fetched a full (non-blinded) block, prefer submitting the full locally @@ -422,7 +420,7 @@ func (r *ProposerRunner) ProcessPostConsensus(ctx context.Context, logger *zap.L // TODO: should we send the block at all if we're not the leader? It's probably not effective but // I left it for now to keep backwards compatibility. validatorConsensusData := &spectypes.ValidatorConsensusData{} - err = validatorConsensusData.Decode(r.GetState().DecidedValue) + err = validatorConsensusData.Decode(r.bState().DecidedValue) if err != nil { return traces.Errorf(span, "could not decode decided validator consensus data: %w", err) } @@ -430,7 +428,7 @@ func (r *ProposerRunner) ProcessPostConsensus(ctx context.Context, logger *zap.L if err != nil { return traces.Errorf(span, "could not get block data from consensus data: %w", err) } - leaderID := r.GetState().RunningInstance.Proposer() + leaderID := r.bState().RunningInstance.Proposer() if r.cachedFullBlock != nil && leaderID == r.operatorSigner.GetOperatorID() { if bytes.Equal(validatorConsensusData.DataSSZ, r.cachedBlindedBlockSSZ) { logger.Debug("leader will use the original full block for proposal submission") @@ -458,54 +456,53 @@ func (r *ProposerRunner) ProcessPostConsensus(ctx context.Context, logger *zap.L logger = logger.With(loggerFields...) + start := time.Now() if err := r.GetBeaconNode().SubmitBeaconBlock(ctx, vBlk, specSig); err != nil { recordFailedSubmission(ctx, spectypes.BNRoleProposer) const errMsg = "could not submit beacon block" - logger.Error(errMsg, - fields.SubmissionTime(time.Since(start)), - zap.Error(err)) + logger.Error(errMsg, fields.Took(time.Since(start)), zap.Error(err)) return traces.Errorf(span, "%s: %w", errMsg, err) } - - const eventMsg = "✅ successfully submitted block proposal" - span.AddEvent(eventMsg, trace.WithAttributes( + const submittedBlockProposalEvent = "✅ successfully submitted block proposal" + span.AddEvent(submittedBlockProposalEvent, trace.WithAttributes( observability.BeaconSlotAttribute(r.BaseRunner.State.CurrentDuty.DutySlot()), observability.DutyRoundAttribute(r.BaseRunner.State.RunningInstance.State.Round), observability.BeaconBlockHashAttribute(blockHash), observability.BeaconBlockIsBlindedAttribute(vBlk.Blinded), )) - logger.Info(eventMsg, - fields.QBFTHeight(r.BaseRunner.QBFTController.Height), - fields.QBFTRound(r.GetState().RunningInstance.State.Round), - fields.Took(time.Since(start)), - fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), - fields.TotalDutyTime(r.measurements.TotalDutyTime())) + logger.Info(submittedBlockProposalEvent, fields.Took(time.Since(start))) - r.GetState().Finished = true + r.bState().Finished = true r.measurements.EndDutyFlow() - recordDutyDuration(ctx, r.measurements.TotalDutyTime(), spectypes.BNRoleProposer, r.GetState().RunningInstance.State.Round) - recordSuccessfulSubmission( - ctx, - 1, - r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()), - spectypes.BNRoleProposer, + recordDutyDuration(ctx, r.measurements.TotalDutyTime(), spectypes.BNRoleProposer, r.bState().RunningInstance.State.Round) + recordSuccessfulSubmission(ctx, 1, r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()), spectypes.BNRoleProposer) + + const dutyFinishedEvent = "successfully finished duty processing" + logger.Info(dutyFinishedEvent, + fields.PreConsensusTime(r.measurements.PreConsensusTime()), + fields.ConsensusTime(r.measurements.ConsensusTime()), + fields.ConsensusRounds(uint64(r.bState().RunningInstance.State.Round)), + fields.PostConsensusTime(r.measurements.PostConsensusTime()), + fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), + fields.TotalDutyTime(r.measurements.TotalDutyTime()), ) + span.AddEvent(dutyFinishedEvent) span.SetStatus(codes.Ok, "") return nil } func (r *ProposerRunner) expectedPreConsensusRootsAndDomain() ([]ssz.HashRoot, phase0.DomainType, error) { - epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()) + epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()) return []ssz.HashRoot{spectypes.SSZUint64(epoch)}, spectypes.DomainRandao, nil } // expectedPostConsensusRootsAndDomain an INTERNAL function, returns the expected post-consensus roots to sign func (r *ProposerRunner) expectedPostConsensusRootsAndDomain(context.Context) ([]ssz.HashRoot, phase0.DomainType, error) { validatorConsensusData := &spectypes.ValidatorConsensusData{} - err := validatorConsensusData.Decode(r.GetState().DecidedValue) + err := validatorConsensusData.Decode(r.bState().DecidedValue) if err != nil { return nil, phase0.DomainType{}, errors.Wrap(err, "could not decode consensus data") } @@ -652,7 +649,7 @@ func (r *ProposerRunner) GetShare() *spectypes.Share { return nil } -func (r *ProposerRunner) GetState() *State { +func (r *ProposerRunner) bState() *State { return r.BaseRunner.State } diff --git a/protocol/v2/ssv/runner/runner.go b/protocol/v2/ssv/runner/runner.go index a0918a10cf..35e8b2e7a5 100644 --- a/protocol/v2/ssv/runner/runner.go +++ b/protocol/v2/ssv/runner/runner.go @@ -64,7 +64,7 @@ type Runner interface { // ProcessPostConsensus processes all post-consensus msgs, returns error if can't process ProcessPostConsensus(ctx context.Context, logger *zap.Logger, signedMsg *spectypes.PartialSignatureMessages) error // OnTimeoutQBFT processes timeout event that can arrive during QBFT consensus phase - OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error + OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error // expectedPreConsensusRootsAndDomain an INTERNAL function, returns the expected pre-consensus roots to sign expectedPreConsensusRootsAndDomain() ([]ssz.HashRoot, phase0.DomainType, error) @@ -237,21 +237,34 @@ func (b *BaseRunner) baseStartNewNonBeaconDuty(ctx context.Context, logger *zap. } // basePreConsensusMsgProcessing is a base func that all runner implementation can call for processing a pre-consensus msg -func (b *BaseRunner) basePreConsensusMsgProcessing( - ctx context.Context, - runner Runner, - signedMsg *spectypes.PartialSignatureMessages, -) (bool, [][32]byte, error) { +func (b *BaseRunner) basePreConsensusMsgProcessing(ctx context.Context, logger *zap.Logger, runner Runner, signedMsg *spectypes.PartialSignatureMessages) (bool, [][32]byte, error) { + ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "runner.process_pre_consensus.base_msg_processing")) + defer span.End() + if err := b.ValidatePreConsensusMsg(ctx, runner, signedMsg); err != nil { return false, nil, errors.Wrap(err, "invalid pre-consensus message") } + const gotPreConsensusMsgEvent = "got pre-consensus message" + logger.Debug(gotPreConsensusMsgEvent, zap.Uint64("signer", ssvtypes.PartialSigMsgSigner(signedMsg))) + span.AddEvent(gotPreConsensusMsgEvent) + hasQuorum, roots := b.basePartialSigMsgProcessing(signedMsg, b.State.PreConsensusContainer) + + if hasQuorum { + const gotPreConsensusQuorumEvent = "got pre consensus quorum" + logger.Debug(gotPreConsensusQuorumEvent) + span.AddEvent(gotPreConsensusQuorumEvent) + } + return hasQuorum, roots, nil } // baseConsensusMsgProcessing is a base func that all runner implementation can call for processing a consensus msg func (b *BaseRunner) baseConsensusMsgProcessing(ctx context.Context, logger *zap.Logger, valueCheckFn specqbft.ProposedValueCheckF, msg *spectypes.SignedSSVMessage, decidedValue spectypes.Encoder) (bool, spectypes.Encoder, error) { + ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "runner.process_consensus.base_msg_processing")) + defer span.End() + prevDecided := false if b.hasRunningDuty() && b.State != nil && b.State.RunningInstance != nil { prevDecided, _ = b.State.RunningInstance.IsDecided() @@ -295,6 +308,10 @@ func (b *BaseRunner) baseConsensusMsgProcessing(ctx context.Context, logger *zap return true, nil, errors.Wrap(err, "could not encode decided value") } + const qbftInstanceIsDecidedEvent = "QBFT instance is decided" + logger.Debug(qbftInstanceIsDecidedEvent) + span.AddEvent(qbftInstanceIsDecidedEvent) + // update the decided and the highest decided slot b.State.DecidedValue = decidedValueEncoded b.highestDecidedSlot = b.State.CurrentDuty.DutySlot() @@ -303,12 +320,26 @@ func (b *BaseRunner) baseConsensusMsgProcessing(ctx context.Context, logger *zap } // basePostConsensusMsgProcessing is a base func that all runner implementation can call for processing a post-consensus msg -func (b *BaseRunner) basePostConsensusMsgProcessing(ctx context.Context, runner Runner, signedMsg *spectypes.PartialSignatureMessages) (bool, [][32]byte, error) { +func (b *BaseRunner) basePostConsensusMsgProcessing(ctx context.Context, logger *zap.Logger, runner Runner, signedMsg *spectypes.PartialSignatureMessages) (bool, [][32]byte, error) { + ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "runner.process_post_consensus.base_msg_processing")) + defer span.End() + if err := b.ValidatePostConsensusMsg(ctx, runner, signedMsg); err != nil { return false, nil, errors.Wrap(err, "invalid post-consensus message") } + const gotPostConsensusMsgEvent = "got post-consensus message" + logger.Debug(gotPostConsensusMsgEvent, zap.Uint64("signer", ssvtypes.PartialSigMsgSigner(signedMsg))) + span.AddEvent(gotPostConsensusMsgEvent) + hasQuorum, roots := b.basePartialSigMsgProcessing(signedMsg, b.State.PostConsensusContainer) + + if hasQuorum { + const gotPostConsensusQuorumEvent = "got post consensus quorum" + logger.Debug(gotPostConsensusQuorumEvent) + span.AddEvent(gotPostConsensusQuorumEvent) + } + return hasQuorum, roots, nil } @@ -401,19 +432,18 @@ func (b *BaseRunner) decide( return traces.Errorf(span, "input data invalid: %w", err) } - span.AddEvent("start new instance") - if err := b.QBFTController.StartNewInstance( + newInstance, err := b.QBFTController.StartNewInstance( ctx, logger, specqbft.Height(slot), byts, valueChecker, - ); err != nil { + ) + if err != nil { return traces.Errorf(span, "could not start new QBFT instance: %w", err) } - newInstance := b.QBFTController.StoredInstances.FindInstance(b.QBFTController.Height) if newInstance == nil { - return traces.Errorf(span, "could not find newly created QBFT instance") + return traces.Errorf(span, "could not start new QBFT instance: instance is nil") } b.State.RunningInstance = newInstance @@ -457,6 +487,6 @@ func (b *BaseRunner) ShouldProcessNonBeaconDuty(duty spectypes.Duty) error { return nil } -func (b *BaseRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return b.QBFTController.OnTimeout(ctx, logger, msg) +func (b *BaseRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return b.QBFTController.OnTimeout(ctx, logger, timeoutData) } diff --git a/protocol/v2/ssv/runner/runner_signatures.go b/protocol/v2/ssv/runner/runner_signatures.go index 533dd7a5b1..8d409049e8 100644 --- a/protocol/v2/ssv/runner/runner_signatures.go +++ b/protocol/v2/ssv/runner/runner_signatures.go @@ -8,11 +8,10 @@ import ( ssz "github.com/ferranbt/fastssz" "github.com/herumi/bls-eth-go-binary/bls" "github.com/pkg/errors" - spectypes "github.com/ssvlabs/ssv-spec/types" "github.com/ssvlabs/ssv/protocol/v2/ssv" - "github.com/ssvlabs/ssv/protocol/v2/types" + ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" ) func signBeaconObject( @@ -90,8 +89,7 @@ func (b *BaseRunner) validatePartialSigMsg( ))) } - // Get signer, it is the same in all psigMsgs.Messages and len(psigMsgs.Messages) > 0 (guaranteed by psigMsgs.Validate()). - msgSigner := psigMsgs.Messages[0].Signer + msgSigner := ssvtypes.PartialSigMsgSigner(psigMsgs) // Get committee (unique for runner) var shareSample *spectypes.Share @@ -137,7 +135,7 @@ func (b *BaseRunner) verifyBeaconPartialSignature(signer spectypes.OperatorID, s committee []*spectypes.ShareMember) error { for _, n := range committee { if n.Signer == signer { - pk, err := types.DeserializeBLSPublicKey(n.SharePubKey) + pk, err := ssvtypes.DeserializeBLSPublicKey(n.SharePubKey) if err != nil { return errors.Wrap(err, "could not deserialized pk") } diff --git a/protocol/v2/ssv/runner/sync_committee_contribution.go b/protocol/v2/ssv/runner/sync_committee_contribution.go index 25812bf471..b12313a0d2 100644 --- a/protocol/v2/ssv/runner/sync_committee_contribution.go +++ b/protocol/v2/ssv/runner/sync_committee_contribution.go @@ -36,7 +36,7 @@ type SyncCommitteeAggregatorRunner struct { network specqbft.Network signer ekm.BeaconSigner operatorSigner ssvtypes.OperatorSigner - measurements measurementsStore + measurements dutyMeasurements // ValCheck is used to validate the qbft-value(s) proposed by other Operators. ValCheck ssv.ValueChecker @@ -71,7 +71,7 @@ func NewSyncCommitteeAggregatorRunner( signer: signer, ValCheck: valCheck, operatorSigner: operatorSigner, - measurements: NewMeasurementsStore(), + measurements: *newMeasurementsStore(), }, nil } @@ -93,7 +93,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPreConsensus(ctx context.Context, )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing sync committee selection proof message: %w", err) } @@ -105,7 +105,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPreConsensus(ctx context.Context, return nil } - epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()) + epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()) recordSuccessfulQuorum(ctx, 1, epoch, spectypes.BNRoleSyncCommitteeContribution, attributeConsensusPhasePreConsensus) r.measurements.EndPreConsensus() @@ -120,11 +120,11 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPreConsensus(ctx context.Context, for i, root := range roots { // reconstruct selection proof sig span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - sig, err := r.GetState().ReconstructBeaconSig(r.GetState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + sig, err := r.bState().ReconstructBeaconSig(r.bState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature for _, root := range roots { - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) } return traces.Errorf(span, "got pre-consensus quorum but it has invalid signatures: %w", err) } @@ -139,30 +139,33 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPreConsensus(ctx context.Context, // fetch sync committee contribution span.AddEvent("fetching sync committee subnet ID") - subnet := r.GetBeaconNode().SyncCommitteeSubnetID(phase0.CommitteeIndex(r.GetState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorSyncCommitteeIndices[i])) + subnet := r.GetBeaconNode().SyncCommitteeSubnetID(phase0.CommitteeIndex(r.bState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorSyncCommitteeIndices[i])) selectionProofs = append(selectionProofs, blsSigSelectionProof) subnets = append(subnets, subnet) } if len(selectionProofs) == 0 { - span.AddEvent("no selection proofs") - span.SetStatus(codes.Ok, "") - r.GetState().Finished = true + r.bState().Finished = true r.measurements.EndDutyFlow() + const dutyFinishedNoProofsEvent = "successfully finished duty processing (no selection proofs)" + logger.Info(dutyFinishedNoProofsEvent, + fields.PreConsensusTime(r.measurements.PreConsensusTime()), + fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), + fields.TotalDutyTime(r.measurements.TotalDutyTime()), + ) + span.AddEvent(dutyFinishedNoProofsEvent) + span.SetStatus(codes.Ok, "") return nil } - duty := r.GetState().CurrentDuty.(*spectypes.ValidatorDuty) - - r.measurements.PauseDutyFlow() + duty := r.bState().CurrentDuty.(*spectypes.ValidatorDuty) span.AddEvent("fetching sync committee contributions") contributions, ver, err := r.GetBeaconNode().GetSyncCommitteeContribution(ctx, duty.DutySlot(), selectionProofs, subnets) if err != nil { return traces.Errorf(span, "could not get sync committee contribution: %w", err) } - r.measurements.ContinueDutyFlow() byts, err := contributions.MarshalSSZ() if err != nil { @@ -177,6 +180,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPreConsensus(ctx context.Context, } r.measurements.StartConsensus() + if err := r.BaseRunner.decide(ctx, logger, r, input.Duty.Slot, input, r.ValCheck); err != nil { return traces.Errorf(span, "can't start new duty runner instance for duty: %w", err) } @@ -195,7 +199,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessConsensus(ctx context.Context, lo )) defer span.End() - span.AddEvent("checking if instance is decided") + span.AddEvent("processing QBFT consensus msg") decided, decidedValue, err := r.BaseRunner.baseConsensusMsgProcessing(ctx, logger, r.ValCheck.CheckValue, signedMsg, &spectypes.ValidatorConsensusData{}) if err != nil { return traces.Errorf(span, "failed processing consensus message: %w", err) @@ -208,7 +212,6 @@ func (r *SyncCommitteeAggregatorRunner) ProcessConsensus(ctx context.Context, lo return nil } - span.AddEvent("instance is decided") r.measurements.EndConsensus() recordConsensusDuration(ctx, r.measurements.ConsensusTime(), spectypes.RoleSyncCommitteeContribution) @@ -284,12 +287,16 @@ func (r *SyncCommitteeAggregatorRunner) ProcessConsensus(ctx context.Context, lo return traces.Errorf(span, "can't broadcast partial post consensus sig: %w", err) } + const broadcastedPostConsensusMsgEvent = "broadcasted post consensus partial signature message" + logger.Debug(broadcastedPostConsensusMsgEvent) + span.AddEvent(broadcastedPostConsensusMsgEvent) + span.SetStatus(codes.Ok, "") return nil } -func (r *SyncCommitteeAggregatorRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return r.BaseRunner.OnTimeoutQBFT(ctx, logger, msg) +func (r *SyncCommitteeAggregatorRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return r.BaseRunner.OnTimeoutQBFT(ctx, logger, timeoutData) } func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context, logger *zap.Logger, signedMsg *spectypes.PartialSignatureMessages) error { @@ -301,7 +308,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePostConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePostConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing post consensus message: %w", err) } @@ -317,7 +324,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context // get contributions validatorConsensusData := &spectypes.ValidatorConsensusData{} - err = validatorConsensusData.Decode(r.GetState().DecidedValue) + err = validatorConsensusData.Decode(r.bState().DecidedValue) if err != nil { return traces.Errorf(span, "could not decode decided validator consensus data: %w", err) } @@ -326,14 +333,19 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context return traces.Errorf(span, "could not get contributions: %w", err) } - var successfullySubmittedContributions uint32 + const submittingSyncCommitteeEvent = "submitting sync committee contributions" + span.AddEvent(submittingSyncCommitteeEvent) + logger.Debug(submittingSyncCommitteeEvent) + + successfullySubmittedContributions := uint32(0) + start := time.Now() for _, root := range roots { span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - sig, err := r.GetState().ReconstructBeaconSig(r.GetState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + sig, err := r.bState().ReconstructBeaconSig(r.bState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature for _, root := range roots { - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PostConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PostConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) } return traces.Errorf(span, "got post-consensus quorum but it has invalid signatures: %w", err) } @@ -341,7 +353,6 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context copy(specSig[:], sig) for _, contribution := range contributions { - start := time.Now() // match the right contrib and proof root to signed root contribAndProof, contribAndProofRoot, err := r.generateContributionAndProof(ctx, contribution.Contribution, contribution.SelectionProofSig) if err != nil { @@ -352,7 +363,7 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context continue // not the correct root } - signedContrib, err := r.GetState().ReconstructBeaconSig(r.GetState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + signedContrib, err := r.bState().ReconstructBeaconSig(r.bState().PostConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { return traces.Errorf(span, "could not reconstruct contribution and proof sig: %w", err) } @@ -363,38 +374,54 @@ func (r *SyncCommitteeAggregatorRunner) ProcessPostConsensus(ctx context.Context Signature: blsSignedContribAndProof, } - if err := r.GetBeaconNode().SubmitSignedContributionAndProof(ctx, signedContribAndProof); err != nil { + const submittingSyncCommitteeEvent = "submitting sync committee contribution" + span.AddEvent(submittingSyncCommitteeEvent) + logger.Debug(submittingSyncCommitteeEvent) + + reqStart := time.Now() + err = r.GetBeaconNode().SubmitSignedContributionAndProof(ctx, signedContribAndProof) + if err != nil { recordFailedSubmission(ctx, spectypes.BNRoleSyncCommitteeContribution) logger.Error("❌ could not submit to Beacon chain reconstructed contribution and proof", - fields.SubmissionTime(time.Since(start)), - zap.Error(err)) + fields.Took(time.Since(reqStart)), + zap.Error(err), + ) return traces.Errorf(span, "could not submit to Beacon chain reconstructed contribution and proof: %w", err) } successfullySubmittedContributions++ - const eventMsg = "✅ successfully submitted sync committee aggregator" - span.AddEvent(eventMsg) - logger.Debug( - eventMsg, - fields.SubmissionTime(time.Since(start)), - fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), - fields.TotalDutyTime(r.measurements.TotalDutyTime()), - ) + const submittedSyncCommitteeEvent = "successfully submitted sync committee contribution" + span.AddEvent(submittedSyncCommitteeEvent) + logger.Debug(submittedSyncCommitteeEvent, fields.Took(time.Since(reqStart))) + break } } - r.GetState().Finished = true + const submittedSyncCommitteeEvent = "✅ successfully submitted sync committee contributions" + span.AddEvent(submittedSyncCommitteeEvent) + logger.Debug(submittedSyncCommitteeEvent, + zap.Uint32("submitted_contributions", successfullySubmittedContributions), + fields.Took(time.Since(start)), + ) + + r.bState().Finished = true r.measurements.EndDutyFlow() - recordDutyDuration(ctx, r.measurements.TotalDutyTime(), spectypes.BNRoleSyncCommitteeContribution, r.GetState().RunningInstance.State.Round) - recordSuccessfulSubmission( - ctx, - successfullySubmittedContributions, - r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()), - spectypes.BNRoleSyncCommitteeContribution, + recordDutyDuration(ctx, r.measurements.TotalDutyTime(), spectypes.BNRoleSyncCommitteeContribution, r.bState().RunningInstance.State.Round) + recordSuccessfulSubmission(ctx, successfullySubmittedContributions, r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()), spectypes.BNRoleSyncCommitteeContribution) + + const dutyFinishedEvent = "successfully finished duty processing" + logger.Info(dutyFinishedEvent, + fields.PreConsensusTime(r.measurements.PreConsensusTime()), + fields.ConsensusTime(r.measurements.ConsensusTime()), + fields.ConsensusRounds(uint64(r.bState().RunningInstance.State.Round)), + fields.PostConsensusTime(r.measurements.PostConsensusTime()), + fields.TotalConsensusTime(r.measurements.TotalConsensusTime()), + fields.TotalDutyTime(r.measurements.TotalDutyTime()), ) + span.AddEvent(dutyFinishedEvent) span.SetStatus(codes.Ok, "") return nil @@ -406,12 +433,12 @@ func (r *SyncCommitteeAggregatorRunner) generateContributionAndProof( proof phase0.BLSSignature, ) (*altair.ContributionAndProof, phase0.Root, error) { contribAndProof := &altair.ContributionAndProof{ - AggregatorIndex: r.GetState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorIndex, + AggregatorIndex: r.bState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorIndex, Contribution: &contrib, SelectionProof: proof, } - epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.GetState().CurrentDuty.DutySlot()) + epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.bState().CurrentDuty.DutySlot()) dContribAndProof, err := r.GetBeaconNode().DomainData(ctx, epoch, spectypes.DomainContributionAndProof) if err != nil { return nil, phase0.Root{}, errors.Wrap(err, "could not get domain data") @@ -425,10 +452,10 @@ func (r *SyncCommitteeAggregatorRunner) generateContributionAndProof( func (r *SyncCommitteeAggregatorRunner) expectedPreConsensusRootsAndDomain() ([]ssz.HashRoot, phase0.DomainType, error) { sszIndexes := make([]ssz.HashRoot, 0) - for _, index := range r.GetState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorSyncCommitteeIndices { + for _, index := range r.bState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorSyncCommitteeIndices { subnet := r.GetBeaconNode().SyncCommitteeSubnetID(phase0.CommitteeIndex(index)) data := &altair.SyncAggregatorSelectionData{ - Slot: r.GetState().CurrentDuty.DutySlot(), + Slot: r.bState().CurrentDuty.DutySlot(), SubcommitteeIndex: subnet, } sszIndexes = append(sszIndexes, data) @@ -440,7 +467,7 @@ func (r *SyncCommitteeAggregatorRunner) expectedPreConsensusRootsAndDomain() ([] func (r *SyncCommitteeAggregatorRunner) expectedPostConsensusRootsAndDomain(ctx context.Context) ([]ssz.HashRoot, phase0.DomainType, error) { // get contributions validatorConsensusData := &spectypes.ValidatorConsensusData{} - err := validatorConsensusData.Decode(r.GetState().DecidedValue) + err := validatorConsensusData.Decode(r.bState().DecidedValue) if err != nil { return nil, spectypes.DomainError, errors.Wrap(err, "could not create consensus data") } @@ -483,7 +510,7 @@ func (r *SyncCommitteeAggregatorRunner) executeDuty(ctx context.Context, logger Messages: []*spectypes.PartialSignatureMessage{}, } - for _, index := range r.GetState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorSyncCommitteeIndices { + for _, index := range r.bState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorSyncCommitteeIndices { subnet := r.GetBeaconNode().SyncCommitteeSubnetID(phase0.CommitteeIndex(index)) data := &altair.SyncAggregatorSelectionData{ Slot: duty.DutySlot(), @@ -590,7 +617,7 @@ func (r *SyncCommitteeAggregatorRunner) GetShare() *spectypes.Share { return nil } -func (r *SyncCommitteeAggregatorRunner) GetState() *State { +func (r *SyncCommitteeAggregatorRunner) bState() *State { return r.BaseRunner.State } diff --git a/protocol/v2/ssv/runner/validator_registration.go b/protocol/v2/ssv/runner/validator_registration.go index c0c122ab56..25f7d129a9 100644 --- a/protocol/v2/ssv/runner/validator_registration.go +++ b/protocol/v2/ssv/runner/validator_registration.go @@ -102,14 +102,15 @@ func (r *ValidatorRegistrationRunner) ProcessPreConsensus(ctx context.Context, l )) defer span.End() - hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing validator registration message: %w", err) } logger.Debug("got partial sig", - zap.Uint64("signer", signedMsg.Messages[0].Signer), - zap.Bool("quorum", hasQuorum)) + zap.Uint64("signer", ssvtypes.PartialSigMsgSigner(signedMsg)), + zap.Bool("quorum", hasQuorum), + ) // quorum returns true only once (first time quorum achieved) if !hasQuorum { @@ -122,10 +123,10 @@ func (r *ValidatorRegistrationRunner) ProcessPreConsensus(ctx context.Context, l root := roots[0] span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - fullSig, err := r.GetState().ReconstructBeaconSig(r.GetState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + fullSig, err := r.bState().ReconstructBeaconSig(r.bState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) return traces.Errorf(span, "got pre-consensus quorum but it has invalid signatures: %w", err) } specSig := phase0.BLSSignature{} @@ -153,9 +154,14 @@ func (r *ValidatorRegistrationRunner) ProcessPreConsensus(ctx context.Context, l span.AddEvent(eventMsg) logger.Debug(eventMsg, fields.FeeRecipient(registration.FeeRecipient[:]), - zap.String("signature", hex.EncodeToString(specSig[:]))) + zap.String("signature", hex.EncodeToString(specSig[:])), + ) + + r.bState().Finished = true - r.GetState().Finished = true + const dutyFinishedEvent = "successfully finished duty processing" + logger.Info(dutyFinishedEvent) + span.AddEvent(dutyFinishedEvent) span.SetStatus(codes.Ok, "") return nil @@ -165,8 +171,8 @@ func (r *ValidatorRegistrationRunner) ProcessConsensus(ctx context.Context, logg return spectypes.NewError(spectypes.ValidatorRegistrationNoConsensusPhaseErrorCode, "no consensus phase for validator registration") } -func (r *ValidatorRegistrationRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return r.BaseRunner.OnTimeoutQBFT(ctx, logger, msg) +func (r *ValidatorRegistrationRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return r.BaseRunner.OnTimeoutQBFT(ctx, logger, timeoutData) } func (r *ValidatorRegistrationRunner) ProcessPostConsensus(ctx context.Context, logger *zap.Logger, signedMsg *spectypes.PartialSignatureMessages) error { @@ -335,7 +341,7 @@ func (r *ValidatorRegistrationRunner) GetShare() *spectypes.Share { return nil } -func (r *ValidatorRegistrationRunner) GetState() *State { +func (r *ValidatorRegistrationRunner) bState() *State { return r.BaseRunner.State } diff --git a/protocol/v2/ssv/runner/voluntary_exit.go b/protocol/v2/ssv/runner/voluntary_exit.go index 07e40c5cff..682474fcbb 100644 --- a/protocol/v2/ssv/runner/voluntary_exit.go +++ b/protocol/v2/ssv/runner/voluntary_exit.go @@ -91,7 +91,7 @@ func (r *VoluntaryExitRunner) ProcessPreConsensus(ctx context.Context, logger *z span.SetAttributes(observability.ValidatorIndexAttribute(validatorIndex)) } - hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, r, signedMsg) + hasQuorum, roots, err := r.BaseRunner.basePreConsensusMsgProcessing(ctx, logger, r, signedMsg) if err != nil { return traces.Errorf(span, "failed processing voluntary exit message: %w", err) } @@ -106,10 +106,10 @@ func (r *VoluntaryExitRunner) ProcessPreConsensus(ctx context.Context, logger *z // only 1 root, verified in basePreConsensusMsgProcessing root := roots[0] span.AddEvent("reconstructing beacon signature", trace.WithAttributes(observability.BeaconBlockRootAttribute(root))) - fullSig, err := r.GetState().ReconstructBeaconSig(r.GetState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) + fullSig, err := r.bState().ReconstructBeaconSig(r.bState().PreConsensusContainer, root, r.GetShare().ValidatorPubKey[:], r.GetShare().ValidatorIndex) if err != nil { // If the reconstructed signature verification failed, fall back to verifying each partial signature - r.BaseRunner.FallBackAndVerifyEachSignature(r.GetState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) + r.BaseRunner.FallBackAndVerifyEachSignature(r.bState().PreConsensusContainer, root, r.GetShare().Committee, r.GetShare().ValidatorIndex) return traces.Errorf(span, "got pre-consensus quorum but it has invalid signatures: %w", err) } specSig := phase0.BLSSignature{} @@ -134,7 +134,11 @@ func (r *VoluntaryExitRunner) ProcessPreConsensus(ctx context.Context, logger *z zap.String("signature", hex.EncodeToString(specSig[:])), ) - r.GetState().Finished = true + r.bState().Finished = true + + const dutyFinishedEvent = "successfully finished duty processing" + logger.Info(dutyFinishedEvent) + span.AddEvent(dutyFinishedEvent) span.SetStatus(codes.Ok, "") return nil @@ -144,8 +148,8 @@ func (r *VoluntaryExitRunner) ProcessConsensus(ctx context.Context, logger *zap. return spectypes.NewError(spectypes.ValidatorExitNoConsensusPhaseErrorCode, "no consensus phase for voluntary exit") } -func (r *VoluntaryExitRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, msg ssvtypes.EventMsg) error { - return r.BaseRunner.OnTimeoutQBFT(ctx, logger, msg) +func (r *VoluntaryExitRunner) OnTimeoutQBFT(ctx context.Context, logger *zap.Logger, timeoutData *ssvtypes.TimeoutData) error { + return r.BaseRunner.OnTimeoutQBFT(ctx, logger, timeoutData) } func (r *VoluntaryExitRunner) ProcessPostConsensus(ctx context.Context, logger *zap.Logger, signedMsg *spectypes.PartialSignatureMessages) error { @@ -237,7 +241,7 @@ func (r *VoluntaryExitRunner) executeDuty(ctx context.Context, logger *zap.Logge // Returns *phase0.VoluntaryExit object with current epoch and own validator index func (r *VoluntaryExitRunner) calculateVoluntaryExit() (*phase0.VoluntaryExit, error) { epoch := r.BaseRunner.NetworkConfig.EstimatedEpochAtSlot(r.BaseRunner.State.CurrentDuty.DutySlot()) - validatorIndex := r.GetState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorIndex + validatorIndex := r.bState().CurrentDuty.(*spectypes.ValidatorDuty).ValidatorIndex return &phase0.VoluntaryExit{ Epoch: epoch, ValidatorIndex: validatorIndex, @@ -294,7 +298,8 @@ func (r *VoluntaryExitRunner) GetShare() *spectypes.Share { } return nil } -func (r *VoluntaryExitRunner) GetState() *State { + +func (r *VoluntaryExitRunner) bState() *State { return r.BaseRunner.State } diff --git a/protocol/v2/ssv/spectest/committee_msg_processing_type.go b/protocol/v2/ssv/spectest/committee_msg_processing_type.go index 1abcc77974..f877a493d9 100644 --- a/protocol/v2/ssv/spectest/committee_msg_processing_type.go +++ b/protocol/v2/ssv/spectest/committee_msg_processing_type.go @@ -20,9 +20,9 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/zap" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/observability/log" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" "github.com/ssvlabs/ssv/protocol/v2/ssv/validator" protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" @@ -178,7 +178,7 @@ func (tests *MultiCommitteeSpecTest) GetPostState(logger *zap.Logger) (interface func overrideStateComparisonCommitteeSpecTest(t *testing.T, test *CommitteeSpecTest, name string, testType string) { specCommittee := &spec.Committee{} - specDir, err := protocoltesting.GetSpecDir("", filepath.Join("ssv", "spectest")) + specDir, err := storage.GetSpecDir("", filepath.Join("ssv", "spectest")) require.NoError(t, err) specCommittee, err = typescomparable.UnmarshalStateComparison(specDir, name, testType, specCommittee) @@ -191,10 +191,10 @@ func overrideStateComparisonCommitteeSpecTest(t *testing.T, test *CommitteeSpecT committee.CommitteeMember = &specCommittee.CommitteeMember for i := range committee.Runners { committee.Runners[i].BaseRunner.NetworkConfig = networkconfig.TestNetwork - committee.Runners[i].ValCheck = qbfttesting.TestingValueChecker{} + committee.Runners[i].ValCheck = protocoltesting.TestingValueChecker{} } for i := range test.Committee.Runners { - test.Committee.Runners[i].ValCheck = qbfttesting.TestingValueChecker{} + test.Committee.Runners[i].ValCheck = protocoltesting.TestingValueChecker{} } root, err := committee.GetRoot() diff --git a/protocol/v2/ssv/spectest/msg_processing_type.go b/protocol/v2/ssv/spectest/msg_processing_type.go index c8cadb61fa..133b622b0f 100644 --- a/protocol/v2/ssv/spectest/msg_processing_type.go +++ b/protocol/v2/ssv/spectest/msg_processing_type.go @@ -22,7 +22,6 @@ import ( "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/qbft/controller" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" ssvprotocoltesting "github.com/ssvlabs/ssv/protocol/v2/ssv/testing" @@ -79,25 +78,25 @@ func (test *MsgProcessingSpecTest) runPreTesting(ctx context.Context, logger *za case *runner.CommitteeRunner: for _, inst := range test.Runner.(*runner.CommitteeRunner).BaseRunner.QBFTController.StoredInstances { if inst.ValueChecker == nil { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } } case *runner.AggregatorRunner: for _, inst := range test.Runner.(*runner.AggregatorRunner).BaseRunner.QBFTController.StoredInstances { if inst.ValueChecker == nil { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } } case *runner.ProposerRunner: for _, inst := range test.Runner.(*runner.ProposerRunner).BaseRunner.QBFTController.StoredInstances { if inst.ValueChecker == nil { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } } case *runner.SyncCommitteeAggregatorRunner: for _, inst := range test.Runner.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.QBFTController.StoredInstances { if inst.ValueChecker == nil { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } } } diff --git a/protocol/v2/ssv/spectest/multi_start_new_runner_duty_type.go b/protocol/v2/ssv/spectest/multi_start_new_runner_duty_type.go index 87e8b16081..5de5257cbe 100644 --- a/protocol/v2/ssv/spectest/multi_start_new_runner_duty_type.go +++ b/protocol/v2/ssv/spectest/multi_start_new_runner_duty_type.go @@ -10,14 +10,13 @@ import ( "testing" spectests "github.com/ssvlabs/ssv-spec/qbft/spectest/tests" - "github.com/stretchr/testify/require" - "go.uber.org/zap" - spectypes "github.com/ssvlabs/ssv-spec/types" spectestingutils "github.com/ssvlabs/ssv-spec/types/testingutils" + "github.com/stretchr/testify/require" + "go.uber.org/zap" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" + protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" ) type StartNewRunnerDutySpecTest struct { @@ -95,31 +94,31 @@ func (test *StartNewRunnerDutySpecTest) RunAsPartOfMultiTest(t *testing.T, logge switch r := test.Runner.(type) { case *runner.CommitteeRunner: for _, inst := range r.BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.BaseRunner.State.RunningInstance != nil { - r.BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.AggregatorRunner: for _, inst := range r.BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.BaseRunner.State.RunningInstance != nil { - r.BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.ProposerRunner: for _, inst := range r.BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.BaseRunner.State.RunningInstance != nil { - r.BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.SyncCommitteeAggregatorRunner: for _, inst := range r.BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.BaseRunner.State.RunningInstance != nil { - r.BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } } diff --git a/protocol/v2/ssv/spectest/ssv_mapping_test.go b/protocol/v2/ssv/spectest/ssv_mapping_test.go index 342a2f802f..1bd3780dd4 100644 --- a/protocol/v2/ssv/spectest/ssv_mapping_test.go +++ b/protocol/v2/ssv/spectest/ssv_mapping_test.go @@ -23,11 +23,11 @@ import ( spectypes "github.com/ssvlabs/ssv-spec/types" spectestingutils "github.com/ssvlabs/ssv-spec/types/testingutils" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/qbft/controller" "github.com/ssvlabs/ssv/protocol/v2/qbft/instance" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" ssvtesting "github.com/ssvlabs/ssv/protocol/v2/ssv/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/validator" @@ -37,7 +37,7 @@ import ( func TestSSVMapping(t *testing.T) { path, err := os.Getwd() require.NoError(t, err) - jsonTests, err := protocoltesting.GenerateSpecTestJSON(path, "ssv") + jsonTests, err := storage.GenerateSpecTestJSON(path, "ssv") require.NoError(t, err) logger := log.TestLogger(t) @@ -393,7 +393,7 @@ func fixRunnerForRun(t *testing.T, runnerMap map[string]interface{}, ks *spectes } func fixControllerForRun(t *testing.T, logger *zap.Logger, runner runner.Runner, contr *controller.Controller, ks *spectestingutils.TestKeySet) *controller.Controller { - config := qbfttesting.TestingConfig(logger, ks) + config := protocoltesting.TestingConfig(logger, ks) newContr := controller.NewController( contr.Identifier, contr.CommitteeMember, diff --git a/protocol/v2/ssv/spectest/sync_committee_aggregator_proof_type.go b/protocol/v2/ssv/spectest/sync_committee_aggregator_proof_type.go index 14b6e33eed..d5c35dfe96 100644 --- a/protocol/v2/ssv/spectest/sync_committee_aggregator_proof_type.go +++ b/protocol/v2/ssv/spectest/sync_committee_aggregator_proof_type.go @@ -13,6 +13,7 @@ import ( typescomparable "github.com/ssvlabs/ssv-spec/types/testingutils/comparable" "github.com/stretchr/testify/require" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" @@ -59,7 +60,7 @@ func overrideStateComparisonForSyncCommitteeAggregatorProofSpecTest(t *testing.T testType = strings.Replace(testType, "spectest.", "synccommitteeaggregator.", 1) runnerState := &runner.State{} - specDir, err := protocoltesting.GetSpecDir("", filepath.Join("ssv", "spectest")) + specDir, err := storage.GetSpecDir("", filepath.Join("ssv", "spectest")) require.NoError(t, err) runnerState, err = typescomparable.UnmarshalStateComparison(specDir, name, testType, runnerState) require.NoError(t, err) diff --git a/protocol/v2/ssv/spectest/util.go b/protocol/v2/ssv/spectest/util.go index cccd8f903a..bdf4bba91a 100644 --- a/protocol/v2/ssv/spectest/util.go +++ b/protocol/v2/ssv/spectest/util.go @@ -7,8 +7,8 @@ import ( typescomparable "github.com/ssvlabs/ssv-spec/types/testingutils/comparable" "github.com/stretchr/testify/require" + "github.com/ssvlabs/ssv/ibft/storage" "github.com/ssvlabs/ssv/networkconfig" - qbfttesting "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" protocoltesting "github.com/ssvlabs/ssv/protocol/v2/testing" ) @@ -32,7 +32,7 @@ func runnerForTest(t *testing.T, runnerType runner.Runner, name string, testType default: t.Fatalf("unknown runner type") } - specDir, err := protocoltesting.GetSpecDir("", filepath.Join("ssv", "spectest")) + specDir, err := storage.GetSpecDir("", filepath.Join("ssv", "spectest")) require.NoError(t, err) r, err = typescomparable.UnmarshalStateComparison(specDir, name, testType, r) require.NoError(t, err) @@ -41,39 +41,39 @@ func runnerForTest(t *testing.T, runnerType runner.Runner, name string, testType switch runnerType.(type) { case *runner.CommitteeRunner: r.(*runner.CommitteeRunner).BaseRunner.NetworkConfig = networkconfig.TestNetwork - r.(*runner.CommitteeRunner).ValCheck = qbfttesting.TestingValueChecker{} + r.(*runner.CommitteeRunner).ValCheck = protocoltesting.TestingValueChecker{} for _, inst := range r.(*runner.CommitteeRunner).BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.(*runner.CommitteeRunner).BaseRunner.State != nil && r.(*runner.CommitteeRunner).BaseRunner.State.RunningInstance != nil { - r.(*runner.CommitteeRunner).BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.(*runner.CommitteeRunner).BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.AggregatorRunner: r.(*runner.AggregatorRunner).BaseRunner.NetworkConfig = networkconfig.TestNetwork - r.(*runner.AggregatorRunner).ValCheck = qbfttesting.TestingValueChecker{} + r.(*runner.AggregatorRunner).ValCheck = protocoltesting.TestingValueChecker{} for _, inst := range r.(*runner.AggregatorRunner).BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.(*runner.AggregatorRunner).BaseRunner.State != nil && r.(*runner.AggregatorRunner).BaseRunner.State.RunningInstance != nil { - r.(*runner.AggregatorRunner).BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.(*runner.AggregatorRunner).BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.ProposerRunner: r.(*runner.ProposerRunner).BaseRunner.NetworkConfig = networkconfig.TestNetwork - r.(*runner.ProposerRunner).ValCheck = qbfttesting.TestingValueChecker{} + r.(*runner.ProposerRunner).ValCheck = protocoltesting.TestingValueChecker{} for _, inst := range r.(*runner.ProposerRunner).BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.(*runner.ProposerRunner).BaseRunner.State != nil && r.(*runner.ProposerRunner).BaseRunner.State.RunningInstance != nil { - r.(*runner.ProposerRunner).BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.(*runner.ProposerRunner).BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.SyncCommitteeAggregatorRunner: r.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.NetworkConfig = networkconfig.TestNetwork - r.(*runner.SyncCommitteeAggregatorRunner).ValCheck = qbfttesting.TestingValueChecker{} + r.(*runner.SyncCommitteeAggregatorRunner).ValCheck = protocoltesting.TestingValueChecker{} for _, inst := range r.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.QBFTController.StoredInstances { - inst.ValueChecker = qbfttesting.TestingValueChecker{} + inst.ValueChecker = protocoltesting.TestingValueChecker{} } if r.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.State != nil && r.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.State.RunningInstance != nil { - r.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.State.RunningInstance.ValueChecker = qbfttesting.TestingValueChecker{} + r.(*runner.SyncCommitteeAggregatorRunner).BaseRunner.State.RunningInstance.ValueChecker = protocoltesting.TestingValueChecker{} } case *runner.ValidatorRegistrationRunner: r.(*runner.ValidatorRegistrationRunner).BaseRunner.NetworkConfig = networkconfig.TestNetwork diff --git a/protocol/v2/ssv/testing/runner.go b/protocol/v2/ssv/testing/runner.go index 459a376d79..0b5279d8dd 100644 --- a/protocol/v2/ssv/testing/runner.go +++ b/protocol/v2/ssv/testing/runner.go @@ -16,7 +16,6 @@ import ( "github.com/ssvlabs/ssv/doppelganger" "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/protocol/v2/qbft/controller" - "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" "github.com/ssvlabs/ssv/protocol/v2/ssv/testing/mocks" @@ -105,14 +104,14 @@ var ConstructBaseRunner = func( valCheck = nil } - config := testing.TestingConfig(logger, keySet) + config := protocoltesting.TestingConfig(logger, keySet) config.ProposerF = func(state *specqbft.State, round specqbft.Round) spectypes.OperatorID { return 1 } config.Network = net config.BeaconSigner = km - contr := testing.NewTestingQBFTController( + contr := protocoltesting.NewTestingQBFTController( spectestingutils.Testing4SharesSet(), identifier[:], operator, @@ -371,13 +370,13 @@ var ConstructBaseRunnerWithShareMap = func( valCheck = nil } - config := testing.TestingConfig(logger, keySetInstance) + config := protocoltesting.TestingConfig(logger, keySetInstance) config.ProposerF = func(state *specqbft.State, round specqbft.Round) spectypes.OperatorID { return 1 } config.Network = net - contr = testing.NewTestingQBFTController( + contr = protocoltesting.NewTestingQBFTController( spectestingutils.Testing4SharesSet(), identifier[:], committeeMember, diff --git a/protocol/v2/qbft/testing/storage.go b/protocol/v2/ssv/testing/storage.go similarity index 94% rename from protocol/v2/qbft/testing/storage.go rename to protocol/v2/ssv/testing/storage.go index d61e4b3a4e..d4218e0a2c 100644 --- a/protocol/v2/qbft/testing/storage.go +++ b/protocol/v2/ssv/testing/storage.go @@ -47,6 +47,6 @@ func newStoresFromRoles(logger *zap.Logger, db basedb.Database, roles ...spectyp return stores } -func TestingStores(logger *zap.Logger) *qbftstorage.ParticipantStores { +func testingStores(logger *zap.Logger) *qbftstorage.ParticipantStores { return newStoresFromRoles(logger, getDB(logger), allRoles...) } diff --git a/protocol/v2/ssv/testing/validator.go b/protocol/v2/ssv/testing/validator.go index bef15b9041..b39288ee0f 100644 --- a/protocol/v2/ssv/testing/validator.go +++ b/protocol/v2/ssv/testing/validator.go @@ -10,10 +10,9 @@ import ( "github.com/ssvlabs/ssv/ssvsigner/ekm" "github.com/ssvlabs/ssv/networkconfig" - "github.com/ssvlabs/ssv/protocol/v2/qbft/testing" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" "github.com/ssvlabs/ssv/protocol/v2/ssv/validator" - testing2 "github.com/ssvlabs/ssv/protocol/v2/testing" + "github.com/ssvlabs/ssv/protocol/v2/testing" "github.com/ssvlabs/ssv/protocol/v2/types" ) @@ -23,8 +22,8 @@ var BaseValidator = func(logger *zap.Logger, keySet *spectestingutils.TestKeySet commonOpts := &validator.CommonOptions{ NetworkConfig: networkconfig.TestNetwork, Network: spectestingutils.NewTestingNetwork(1, keySet.OperatorKeys[1]), - Beacon: testing2.NewTestingBeaconNodeWrapped(), - Storage: testing.TestingStores(logger), + Beacon: testing.NewTestingBeaconNodeWrapped(), + Storage: testingStores(logger), Signer: ekm.NewTestingKeyManagerAdapter(spectestingutils.NewTestingKeyManager()), } diff --git a/protocol/v2/ssv/validator/committee.go b/protocol/v2/ssv/validator/committee.go index 23b97d7ed2..4694dc1f76 100644 --- a/protocol/v2/ssv/validator/committee.go +++ b/protocol/v2/ssv/validator/committee.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "encoding/json" + "fmt" "sync" "github.com/attestantio/go-eth2-client/spec/phase0" @@ -19,6 +20,7 @@ import ( "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/observability/log/fields" "github.com/ssvlabs/ssv/observability/traces" + "github.com/ssvlabs/ssv/protocol/v2/message" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" "github.com/ssvlabs/ssv/protocol/v2/types" @@ -230,6 +232,141 @@ func (c *Committee) prepareDuty(logger *zap.Logger, duty *spectypes.CommitteeDut return shares, attesters, runnableDuty, nil } +// ProcessMessage processes p2p message of all types +func (c *Committee) ProcessMessage(ctx context.Context, logger *zap.Logger, msg *queue.SSVMessage) error { + msgType := msg.GetType() + msgID := msg.GetID() + + // Validate message (+ verify SignedSSVMessage's signature) + if msgType != message.SSVEventMsgType { + if err := msg.SignedSSVMessage.Validate(); err != nil { + return fmt.Errorf("invalid SignedSSVMessage: %w", err) + } + if err := spectypes.Verify(msg.SignedSSVMessage, c.CommitteeMember.Committee); err != nil { + return spectypes.WrapError(spectypes.SSVMessageHasInvalidSignatureErrorCode, fmt.Errorf("SignedSSVMessage has an invalid signature: %w", err)) + } + if err := c.validateMessage(msg.SignedSSVMessage.SSVMessage); err != nil { + // TODO - we should improve this error message as is suggested by the commented-out code here + // (and also remove nolint annotation), currently we cannot do it due to spec-tests expecting + // this exact format we are stuck with. + //return fmt.Errorf("SSVMessage invalid: %w", err) + return fmt.Errorf("Message invalid: %w", err) //nolint:staticcheck + } + } + + slot, err := msg.Slot() + if err != nil { + return fmt.Errorf("couldn't get message slot: %w", err) + } + dutyID := fields.BuildCommitteeDutyID(types.OperatorIDsFromOperators(c.CommitteeMember.Committee), c.networkConfig.EstimatedEpochAtSlot(slot), slot) + + ctx, span := tracer.Start(traces.Context(ctx, dutyID), + observability.InstrumentName(observabilityNamespace, "process_committee_message"), + trace.WithAttributes( + observability.ValidatorMsgTypeAttribute(msgType), + observability.ValidatorMsgIDAttribute(msgID), + observability.RunnerRoleAttribute(msgID.GetRoleType()), + observability.CommitteeIDAttribute(c.CommitteeMember.CommitteeID), + observability.BeaconSlotAttribute(slot), + observability.DutyIDAttribute(dutyID), + ), + ) + defer span.End() + + switch msgType { + case spectypes.SSVConsensusMsgType: + qbftMsg := &specqbft.Message{} + if err := qbftMsg.Decode(msg.GetData()); err != nil { + return traces.Errorf(span, "could not decode consensus Message: %w", err) + } + if err := qbftMsg.Validate(); err != nil { + return traces.Errorf(span, "invalid QBFT Message: %w", err) + } + + c.mtx.RLock() + r, exists := c.Runners[slot] + c.mtx.RUnlock() + if !exists { + return spectypes.WrapError(spectypes.NoRunnerForSlotErrorCode, traces.Errorf(span, "no runner found for message's slot")) + } + + if err := r.ProcessConsensus(ctx, logger, msg.SignedSSVMessage); err != nil { + return traces.Error(span, err) + } + + span.SetStatus(codes.Ok, "") + return nil + case spectypes.SSVPartialSignatureMsgType: + pSigMessages := &spectypes.PartialSignatureMessages{} + if err := pSigMessages.Decode(msg.SignedSSVMessage.SSVMessage.GetData()); err != nil { + return traces.Errorf(span, "could not decode PartialSignatureMessages: %w", err) + } + + // Validate + if len(msg.SignedSSVMessage.OperatorIDs) != 1 { + return traces.Errorf(span, "PartialSignatureMessage has more than 1 signer") + } + + if err := pSigMessages.ValidateForSigner(msg.SignedSSVMessage.OperatorIDs[0]); err != nil { + return traces.Errorf(span, "invalid PartialSignatureMessages: %w", err) + } + + if pSigMessages.Type == spectypes.PostConsensusPartialSig { + c.mtx.RLock() + r, exists := c.Runners[pSigMessages.Slot] + c.mtx.RUnlock() + if !exists { + return spectypes.WrapError(spectypes.NoRunnerForSlotErrorCode, traces.Errorf(span, "no runner found for message's slot")) + } + if err := r.ProcessPostConsensus(ctx, logger, pSigMessages); err != nil { + return traces.Error(span, err) + } + } + + span.SetStatus(codes.Ok, "") + return nil + case message.SSVEventMsgType: + eventMsg, ok := msg.Body.(*types.EventMsg) + if !ok { + return traces.Error(span, fmt.Errorf("could not decode event message")) + } + + span.SetAttributes(observability.ValidatorEventTypeAttribute(eventMsg.Type)) + + switch eventMsg.Type { + case types.Timeout: + slot, err := msg.Slot() + if err != nil { + return traces.Errorf(span, "could not get slot from message: %w", err) + } + + c.mtx.RLock() + dutyRunner, found := c.Runners[slot] + c.mtx.RUnlock() + + if !found { + return traces.Errorf(span, "no committee runner or queue found for slot") + } + + timeoutData, err := eventMsg.GetTimeoutData() + if err != nil { + return traces.Errorf(span, "get timeout data: %w", err) + } + + if err := dutyRunner.OnTimeoutQBFT(ctx, logger, timeoutData); err != nil { + return traces.Errorf(span, "timeout event: %w", err) + } + + span.SetStatus(codes.Ok, "") + return nil + default: + return traces.Errorf(span, "unknown event msg - %s", eventMsg.Type.String()) + } + default: + return traces.Errorf(span, "unknown message type: %d", msgType) + } +} + func (c *Committee) unsafePruneExpiredRunners(logger *zap.Logger, currentSlot phase0.Slot) { const lateSlotAllowance = 2 // LateSlotAllowance from message/validation/const.go runnerExpirySlots := phase0.Slot(c.networkConfig.SlotsPerEpoch + lateSlotAllowance) @@ -246,7 +383,7 @@ func (c *Committee) unsafePruneExpiredRunners(logger *zap.Logger, currentSlot ph epoch := c.networkConfig.EstimatedEpochAtSlot(slot) committeeDutyID := fields.BuildCommitteeDutyID(opIds, epoch, slot) logger = logger.With(fields.DutyID(committeeDutyID)) - logger.Debug("pruning expired committee runner", zap.Uint64("slot", uint64(slot))) + logger.Debug("pruning expired committee runner", zap.Uint64("prune_slot", uint64(slot))) delete(c.Runners, slot) delete(c.Queues, slot) } diff --git a/protocol/v2/ssv/validator/committee_observer.go b/protocol/v2/ssv/validator/committee_observer.go index 9a4412e6af..5bb827660b 100644 --- a/protocol/v2/ssv/validator/committee_observer.go +++ b/protocol/v2/ssv/validator/committee_observer.go @@ -22,7 +22,6 @@ import ( "github.com/ssvlabs/ssv/networkconfig" "github.com/ssvlabs/ssv/observability/log/fields" qbftcontroller "github.com/ssvlabs/ssv/protocol/v2/qbft/controller" - qbftstorage "github.com/ssvlabs/ssv/protocol/v2/qbft/storage" "github.com/ssvlabs/ssv/protocol/v2/ssv" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" @@ -177,8 +176,8 @@ func (ncv *CommitteeObserver) ProcessMessage(msg *queue.SSVMessage) error { ) if ncv.newDecidedHandler != nil { - p := qbftstorage.Participation{ - ParticipantsRangeEntry: qbftstorage.ParticipantsRangeEntry{ + p := storage.Participation{ + ParticipantsRangeEntry: storage.ParticipantsRangeEntry{ Slot: slot, Signers: quorum, }, diff --git a/protocol/v2/ssv/validator/committee_queue.go b/protocol/v2/ssv/validator/committee_queue.go index c47cf4d46a..c4c4aeea7d 100644 --- a/protocol/v2/ssv/validator/committee_queue.go +++ b/protocol/v2/ssv/validator/committee_queue.go @@ -160,19 +160,18 @@ func (c *Committee) ConsumeQueue( msgRetries.Set(c.messageID(msg), 0, ttlcache.DefaultTTL) msgRetryItem = msgRetries.Get(c.messageID(msg)) } - msgRetryCnt := msgRetryItem.Value() + attempt := msgRetryItem.Value() + 1 msgLogger = logWithMessageMetadata(msgLogger, msg). With(zap.String("message_identifier", string(c.messageID(msg)))). - With(zap.Int64("attempt", msgRetryCnt+1)) + With(zap.Int64("attempt", attempt)) const couldNotHandleMsgLogPrefix = "could not handle message, " switch { case errors.Is(err, runner.ErrNoValidDutiesToExecute): msgLogger.Error("❗ "+couldNotHandleMsgLogPrefix+"dropping message and terminating committee-runner", zap.Error(err)) - case runner.IsRetryable(err) && msgRetryCnt < retryCount: - msgLogger.Debug(fmt.Sprintf(couldNotHandleMsgLogPrefix+"retrying message in ~%dms", retryDelay.Milliseconds()), zap.Error(err)) - msgRetries.Set(c.messageID(msg), msgRetryCnt+1, ttlcache.DefaultTTL) + case runner.IsRetryable(err) && attempt <= retryCount: + msgRetries.Set(c.messageID(msg), attempt, ttlcache.DefaultTTL) go func(msg *queue.SSVMessage) { time.Sleep(retryDelay) if pushed := q.Q.TryPush(msg); !pushed { @@ -183,7 +182,7 @@ func (c *Committee) ConsumeQueue( } }(msg) default: - msgLogger.Warn(couldNotHandleMsgLogPrefix+"dropping message", zap.Error(err)) + msgLogger.Debug(couldNotHandleMsgLogPrefix+"dropping message", zap.Error(err)) } if errors.Is(err, runner.ErrNoValidDutiesToExecute) { @@ -194,105 +193,6 @@ func (c *Committee) ConsumeQueue( } } -// ProcessMessage processes p2p message of all types -func (c *Committee) ProcessMessage(ctx context.Context, logger *zap.Logger, msg *queue.SSVMessage) error { - msgType := msg.GetType() - msgID := msg.GetID() - - // Validate message (+ verify SignedSSVMessage's signature) - if msgType != message.SSVEventMsgType { - if err := msg.SignedSSVMessage.Validate(); err != nil { - return fmt.Errorf("invalid SignedSSVMessage: %w", err) - } - if err := spectypes.Verify(msg.SignedSSVMessage, c.CommitteeMember.Committee); err != nil { - return spectypes.WrapError(spectypes.SSVMessageHasInvalidSignatureErrorCode, fmt.Errorf("SignedSSVMessage has an invalid signature: %w", err)) - } - if err := c.validateMessage(msg.SignedSSVMessage.SSVMessage); err != nil { - // TODO - we should improve this error message as is suggested by the commented-out code here - // (and also remove nolint annotation), currently we cannot do it due to spec-tests expecting - // this exact format we are stuck with. - //return fmt.Errorf("SSVMessage invalid: %w", err) - return fmt.Errorf("Message invalid: %w", err) //nolint:staticcheck - } - } - - slot, err := msg.Slot() - if err != nil { - return fmt.Errorf("couldn't get message slot: %w", err) - } - dutyID := fields.BuildCommitteeDutyID(types.OperatorIDsFromOperators(c.CommitteeMember.Committee), c.networkConfig.EstimatedEpochAtSlot(slot), slot) - - ctx, span := tracer.Start(traces.Context(ctx, dutyID), - observability.InstrumentName(observabilityNamespace, "process_committee_message"), - trace.WithAttributes( - observability.ValidatorMsgTypeAttribute(msgType), - observability.ValidatorMsgIDAttribute(msgID), - observability.RunnerRoleAttribute(msgID.GetRoleType()), - observability.CommitteeIDAttribute(c.CommitteeMember.CommitteeID), - observability.BeaconSlotAttribute(slot), - observability.DutyIDAttribute(dutyID), - ), - ) - defer span.End() - - switch msgType { - case spectypes.SSVConsensusMsgType: - qbftMsg := &specqbft.Message{} - if err := qbftMsg.Decode(msg.GetData()); err != nil { - return traces.Errorf(span, "could not decode consensus Message: %w", err) - } - if err := qbftMsg.Validate(); err != nil { - return traces.Errorf(span, "invalid QBFT Message: %w", err) - } - c.mtx.RLock() - r, exists := c.Runners[slot] - c.mtx.RUnlock() - if !exists { - return spectypes.WrapError(spectypes.NoRunnerForSlotErrorCode, traces.Errorf(span, "no runner found for message's slot")) - } - return r.ProcessConsensus(ctx, logger, msg.SignedSSVMessage) - case spectypes.SSVPartialSignatureMsgType: - pSigMessages := &spectypes.PartialSignatureMessages{} - if err := pSigMessages.Decode(msg.SignedSSVMessage.SSVMessage.GetData()); err != nil { - return traces.Errorf(span, "could not decode PartialSignatureMessages: %w", err) - } - - // Validate - if len(msg.SignedSSVMessage.OperatorIDs) != 1 { - return traces.Errorf(span, "PartialSignatureMessage has more than 1 signer") - } - - if err := pSigMessages.ValidateForSigner(msg.SignedSSVMessage.OperatorIDs[0]); err != nil { - return traces.Errorf(span, "invalid PartialSignatureMessages: %w", err) - } - - if pSigMessages.Type == spectypes.PostConsensusPartialSig { - c.mtx.RLock() - r, exists := c.Runners[pSigMessages.Slot] - c.mtx.RUnlock() - if !exists { - return spectypes.WrapError(spectypes.NoRunnerForSlotErrorCode, traces.Errorf(span, "no runner found for message's slot")) - } - if err := r.ProcessPostConsensus(ctx, logger, pSigMessages); err != nil { - return traces.Error(span, err) - } - span.SetStatus(codes.Ok, "") - return nil - } - case message.SSVEventMsgType: - if err := c.handleEventMessage(ctx, logger, msg); err != nil { - return traces.Errorf(span, "could not handle event message: %w", err) - } - span.SetStatus(codes.Ok, "") - return nil - default: - return traces.Errorf(span, "unknown message type: %d", msgType) - } - - span.SetStatus(codes.Ok, "") - return nil -} - func (c *Committee) logWithMessageFields(logger *zap.Logger, msg *queue.SSVMessage) (*zap.Logger, error) { msgType := msg.GetType() @@ -302,6 +202,19 @@ func (c *Committee) logWithMessageFields(logger *zap.Logger, msg *queue.SSVMessa qbftMsg := msg.Body.(*specqbft.Message) logger = logger.With(fields.QBFTRound(qbftMsg.Round), fields.QBFTHeight(qbftMsg.Height)) } + if msg.MsgType == message.SSVEventMsgType { + eventMsg, ok := msg.Body.(*types.EventMsg) + if !ok { + return nil, fmt.Errorf("could not decode event message") + } + if eventMsg.Type == types.Timeout { + timeoutData, err := eventMsg.GetTimeoutData() + if err != nil { + return nil, fmt.Errorf("get timeout data: %w", err) + } + logger = logger.With(fields.QBFTRound(timeoutData.Round), fields.QBFTHeight(timeoutData.Height)) + } + } return logger, nil } diff --git a/protocol/v2/ssv/validator/events.go b/protocol/v2/ssv/validator/events.go deleted file mode 100644 index a03d5e7f8a..0000000000 --- a/protocol/v2/ssv/validator/events.go +++ /dev/null @@ -1,81 +0,0 @@ -package validator - -import ( - "context" - - "go.opentelemetry.io/otel/codes" - "go.uber.org/zap" - - "github.com/ssvlabs/ssv/observability" - "github.com/ssvlabs/ssv/observability/traces" - "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" - "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" - "github.com/ssvlabs/ssv/protocol/v2/types" -) - -func (v *Validator) handleEventMessage(ctx context.Context, logger *zap.Logger, msg *queue.SSVMessage, dutyRunner runner.Runner) error { - ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "handle_event_message")) - defer span.End() - - eventMsg, ok := msg.Body.(*types.EventMsg) - if !ok { - return traces.Errorf(span, "could not decode event message") - } - - span.SetAttributes(observability.ValidatorEventTypeAttribute(eventMsg.Type)) - - switch eventMsg.Type { - case types.Timeout: - if err := dutyRunner.OnTimeoutQBFT(ctx, logger, *eventMsg); err != nil { - return traces.Errorf(span, "timeout event: %w", err) - } - span.SetStatus(codes.Ok, "") - return nil - case types.ExecuteDuty: - if err := v.OnExecuteDuty(ctx, logger, eventMsg); err != nil { - return traces.Errorf(span, "execute duty event: %w", err) - } - span.SetStatus(codes.Ok, "") - return nil - default: - return traces.Errorf(span, "unknown event msg - %s", eventMsg.Type.String()) - } -} - -func (c *Committee) handleEventMessage(ctx context.Context, logger *zap.Logger, msg *queue.SSVMessage) error { - ctx, span := tracer.Start(ctx, observability.InstrumentName(observabilityNamespace, "handle_committee_event_message")) - defer span.End() - - eventMsg, ok := msg.Body.(*types.EventMsg) - if !ok { - return traces.Errorf(span, "could not decode event message") - } - - span.SetAttributes(observability.ValidatorEventTypeAttribute(eventMsg.Type)) - - switch eventMsg.Type { - case types.Timeout: - slot, err := msg.Slot() - if err != nil { - return traces.Errorf(span, "could not get slot from message: %w", err) - } - c.mtx.RLock() - dutyRunner, found := c.Runners[slot] - c.mtx.RUnlock() - - if !found { - const errMsg = "no committee runner or queue found for slot" - logger.Error(errMsg) - span.SetStatus(codes.Error, errMsg) - return nil - } - - if err := dutyRunner.OnTimeoutQBFT(ctx, logger, *eventMsg); err != nil { - return traces.Errorf(span, "timeout event: %w", err) - } - span.SetStatus(codes.Ok, "") - return nil - default: - return traces.Errorf(span, "unknown event msg - %s", eventMsg.Type.String()) - } -} diff --git a/protocol/v2/ssv/validator/msg_queue.go b/protocol/v2/ssv/validator/msg_queue.go index a380b40d80..40c980c7db 100644 --- a/protocol/v2/ssv/validator/msg_queue.go +++ b/protocol/v2/ssv/validator/msg_queue.go @@ -11,6 +11,7 @@ import ( "github.com/ssvlabs/ssv/observability/log/fields" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" + ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" ) // MessageHandler process the provided message. Message processing can fail with retryable or @@ -35,7 +36,7 @@ func messageID(msg *queue.SSVMessage, logger *zap.Logger) msgIDType { } if msg.MsgType == spectypes.SSVPartialSignatureMsgType { psm := msg.Body.(*spectypes.PartialSignatureMessages) - signer := fmt.Sprintf("%d", psm.Messages[0].Signer) // same signer for all messages + signer := fmt.Sprintf("%d", ssvtypes.PartialSigMsgSigner(psm)) return msgIDType(fmt.Sprintf("%d-%d-%d-%s-%s", msgSlot, msg.MsgType, psm.Type, msg.MsgID, signer)) } return idUndefined @@ -54,11 +55,9 @@ func logWithMessageMetadata(logger *zap.Logger, msg *queue.SSVMessage) *zap.Logg if msg.MsgType == spectypes.SSVPartialSignatureMsgType { psm := msg.Body.(*spectypes.PartialSignatureMessages) - // signer must be the same for all messages, at least 1 message must be present (this is validated prior) - signer := psm.Messages[0].Signer logger = logger.With( zap.Uint64("partial_sig_msg_type", uint64(psm.Type)), - zap.Uint64("signer", signer), + zap.Uint64("signer", ssvtypes.PartialSigMsgSigner(psm)), ) } diff --git a/protocol/v2/ssv/validator/queue_validator.go b/protocol/v2/ssv/validator/queue_validator.go index 7944b8673d..1d8d854a40 100644 --- a/protocol/v2/ssv/validator/queue_validator.go +++ b/protocol/v2/ssv/validator/queue_validator.go @@ -179,17 +179,17 @@ func (v *Validator) StartQueueConsumer( msgRetries.Set(v.messageID(msg), 0, ttlcache.DefaultTTL) msgRetryItem = msgRetries.Get(v.messageID(msg)) } - msgRetryCnt := msgRetryItem.Value() + attempt := msgRetryItem.Value() + 1 msgLogger = logWithMessageMetadata(msgLogger, msg). With(zap.String("message_identifier", string(v.messageID(msg)))). - With(zap.Int64("attempt", msgRetryCnt+1)) + With(zap.Int64("attempt", attempt)) const couldNotHandleMsgLogPrefix = "could not handle message, " switch { - case runner.IsRetryable(err) && msgRetryCnt < retryCount: + case runner.IsRetryable(err) && attempt <= retryCount: msgLogger.Debug(fmt.Sprintf(couldNotHandleMsgLogPrefix+"retrying message in ~%dms", retryDelay.Milliseconds()), zap.Error(err)) - msgRetries.Set(v.messageID(msg), msgRetryCnt+1, ttlcache.DefaultTTL) + msgRetries.Set(v.messageID(msg), attempt, ttlcache.DefaultTTL) go func(msg *queue.SSVMessage) { time.Sleep(retryDelay) if pushed := q.TryPush(msg); !pushed { @@ -200,7 +200,7 @@ func (v *Validator) StartQueueConsumer( } }(msg) default: - msgLogger.Warn(couldNotHandleMsgLogPrefix+"dropping message", zap.Error(err)) + msgLogger.Debug(couldNotHandleMsgLogPrefix+"dropping message", zap.Error(err)) } } } @@ -218,118 +218,6 @@ func (v *Validator) StartQueueConsumer( }() } -// ProcessMessage processes p2p message of all types -func (v *Validator) ProcessMessage(ctx context.Context, logger *zap.Logger, msg *queue.SSVMessage) error { - msgType := msg.GetType() - msgID := msg.GetID() - - // Validate message (+ verify SignedSSVMessage's signature) - if msgType != message.SSVEventMsgType { - if err := msg.SignedSSVMessage.Validate(); err != nil { - return fmt.Errorf("invalid SignedSSVMessage: %w", err) - } - if err := spectypes.Verify(msg.SignedSSVMessage, v.Operator.Committee); err != nil { - return spectypes.WrapError(spectypes.SSVMessageHasInvalidSignatureErrorCode, fmt.Errorf("SignedSSVMessage has an invalid signature: %w", err)) - } - } - - slot, err := msg.Slot() - if err != nil { - return fmt.Errorf("couldn't get message slot: %w", err) - } - dutyID := fields.BuildDutyID(v.NetworkConfig.EstimatedEpochAtSlot(slot), slot, msgID.GetRoleType(), v.Share.ValidatorIndex) - - ctx, span := tracer.Start(ctx, - observability.InstrumentName(observabilityNamespace, "process_message"), - trace.WithAttributes( - observability.ValidatorMsgTypeAttribute(msgType), - observability.ValidatorMsgIDAttribute(msgID), - observability.RunnerRoleAttribute(msgID.GetRoleType()), - observability.BeaconSlotAttribute(slot), - observability.DutyIDAttribute(dutyID), - ), - ) - defer span.End() - - // Get runner - dutyRunner := v.DutyRunners.DutyRunnerForMsgID(msgID) - if dutyRunner == nil { - return traces.Errorf(span, "could not get duty runner for msg ID %v", msgID) - } - - // Validate message for runner - if err := validateMessage(v.Share.Share, msg); err != nil { - return traces.Errorf(span, "message invalid for msg ID %v: %w", msgID, err) - } - switch msgType { - case spectypes.SSVConsensusMsgType: - qbftMsg, ok := msg.Body.(*specqbft.Message) - if !ok { - return traces.Errorf(span, "could not decode consensus message from network message") - } - - if err := qbftMsg.Validate(); err != nil { - return traces.Errorf(span, "invalid QBFT Message: %w", err) - } - - if err := dutyRunner.ProcessConsensus(ctx, logger, msg.SignedSSVMessage); err != nil { - return traces.Error(span, err) - } - span.SetStatus(codes.Ok, "") - return nil - case spectypes.SSVPartialSignatureMsgType: - signedMsg, ok := msg.Body.(*spectypes.PartialSignatureMessages) - if !ok { - return traces.Errorf(span, "could not decode post consensus message from network message") - } - - span.SetAttributes(observability.ValidatorPartialSigMsgTypeAttribute(signedMsg.Type)) - - if len(msg.SignedSSVMessage.OperatorIDs) != 1 { - return traces.Errorf(span, "PartialSignatureMessage has more than 1 signer") - } - - if err := signedMsg.ValidateForSigner(msg.SignedSSVMessage.OperatorIDs[0]); err != nil { - return traces.Errorf(span, "invalid PartialSignatureMessages: %w", err) - } - - if signedMsg.Type == spectypes.PostConsensusPartialSig { - span.AddEvent("processing post-consensus message") - if err := dutyRunner.ProcessPostConsensus(ctx, logger, signedMsg); err != nil { - return traces.Error(span, err) - } - span.SetStatus(codes.Ok, "") - return nil - } - span.AddEvent("processing pre-consensus message") - if err := dutyRunner.ProcessPreConsensus(ctx, logger, signedMsg); err != nil { - return traces.Error(span, err) - } - span.SetStatus(codes.Ok, "") - return nil - case message.SSVEventMsgType: - if err := v.handleEventMessage(ctx, logger, msg, dutyRunner); err != nil { - return traces.Errorf(span, "could not handle event message: %w", err) - } - span.SetStatus(codes.Ok, "") - return nil - default: - return traces.Errorf(span, "unknown message type %d", msgType) - } -} - -func validateMessage(share spectypes.Share, msg *queue.SSVMessage) error { - if !share.ValidatorPubKey.MessageIDBelongs(msg.GetID()) { - return errors.New("msg ID doesn't match validator ID") - } - - if len(msg.GetData()) == 0 { - return errors.New("msg data is invalid") - } - - return nil -} - func (v *Validator) logWithMessageFields(logger *zap.Logger, msg *queue.SSVMessage) (*zap.Logger, error) { msgType := msg.GetType() msgID := msg.GetID() @@ -352,6 +240,19 @@ func (v *Validator) logWithMessageFields(logger *zap.Logger, msg *queue.SSVMessa qbftMsg := msg.Body.(*specqbft.Message) logger = logger.With(fields.QBFTRound(qbftMsg.Round), fields.QBFTHeight(qbftMsg.Height)) } + if msg.MsgType == message.SSVEventMsgType { + eventMsg, ok := msg.Body.(*types.EventMsg) + if !ok { + return nil, fmt.Errorf("could not decode event message") + } + if eventMsg.Type == types.Timeout { + timeoutData, err := eventMsg.GetTimeoutData() + if err != nil { + return nil, fmt.Errorf("get timeout data: %w", err) + } + logger = logger.With(fields.QBFTRound(timeoutData.Round), fields.QBFTHeight(timeoutData.Height)) + } + } return logger, nil } diff --git a/protocol/v2/ssv/validator/timer.go b/protocol/v2/ssv/validator/timer.go index bb703641b0..218a82adf7 100644 --- a/protocol/v2/ssv/validator/timer.go +++ b/protocol/v2/ssv/validator/timer.go @@ -89,13 +89,9 @@ func (c *Committee) onTimeout(ctx context.Context, logger *zap.Logger, identifie c.mtx.RLock() // read-lock for c.Queues, c.Runners defer c.mtx.RUnlock() - // only run if the validator is started - //if v.state != uint32(Started) { - // return - //} dr := c.Runners[phase0.Slot(height)] if dr == nil { // only happens when we prune expired runners - logger.Debug("❗no committee runner found for slot", fields.Slot(phase0.Slot(height))) + logger.Debug("❗no committee runner found for slot") return } @@ -116,10 +112,8 @@ func (c *Committee) onTimeout(ctx context.Context, logger *zap.Logger, identifie } if pushed := c.Queues[phase0.Slot(height)].Q.TryPush(dec); !pushed { - logger.Warn("❗️ dropping timeout message because the queue is full", - fields.RunnerRole(identifier.GetRoleType())) + logger.Warn("❗️ dropping timeout message because the queue is full", fields.RunnerRole(identifier.GetRoleType())) } - // logger.Debug("📬 queue: pushed message", fields.PubKey(identifier.GetPubKey()), fields.MessageID(dec.MsgID), fields.MessageType(dec.MsgType)) } } diff --git a/protocol/v2/ssv/validator/validator.go b/protocol/v2/ssv/validator/validator.go index 87e9ed76c6..6c422ae8aa 100644 --- a/protocol/v2/ssv/validator/validator.go +++ b/protocol/v2/ssv/validator/validator.go @@ -2,6 +2,7 @@ package validator import ( "context" + "fmt" "sync" specqbft "github.com/ssvlabs/ssv-spec/qbft" @@ -18,6 +19,7 @@ import ( "github.com/ssvlabs/ssv/observability/log" "github.com/ssvlabs/ssv/observability/log/fields" "github.com/ssvlabs/ssv/observability/traces" + "github.com/ssvlabs/ssv/protocol/v2/message" "github.com/ssvlabs/ssv/protocol/v2/ssv/queue" "github.com/ssvlabs/ssv/protocol/v2/ssv/runner" ssvtypes "github.com/ssvlabs/ssv/protocol/v2/types" @@ -122,3 +124,142 @@ func (v *Validator) StartDuty(ctx context.Context, logger *zap.Logger, duty spec span.SetStatus(codes.Ok, "") return nil } + +// ProcessMessage processes p2p message of all types +func (v *Validator) ProcessMessage(ctx context.Context, logger *zap.Logger, msg *queue.SSVMessage) error { + msgType := msg.GetType() + msgID := msg.GetID() + + // Validate message (+ verify SignedSSVMessage's signature) + if msgType != message.SSVEventMsgType { + if err := msg.SignedSSVMessage.Validate(); err != nil { + return fmt.Errorf("invalid SignedSSVMessage: %w", err) + } + if err := spectypes.Verify(msg.SignedSSVMessage, v.Operator.Committee); err != nil { + return spectypes.WrapError(spectypes.SSVMessageHasInvalidSignatureErrorCode, fmt.Errorf("SignedSSVMessage has an invalid signature: %w", err)) + } + } + + slot, err := msg.Slot() + if err != nil { + return fmt.Errorf("couldn't get message slot: %w", err) + } + dutyID := fields.BuildDutyID(v.NetworkConfig.EstimatedEpochAtSlot(slot), slot, msgID.GetRoleType(), v.Share.ValidatorIndex) + + ctx, span := tracer.Start(ctx, + observability.InstrumentName(observabilityNamespace, "process_message"), + trace.WithAttributes( + observability.ValidatorMsgTypeAttribute(msgType), + observability.ValidatorMsgIDAttribute(msgID), + observability.RunnerRoleAttribute(msgID.GetRoleType()), + observability.BeaconSlotAttribute(slot), + observability.DutyIDAttribute(dutyID), + ), + ) + defer span.End() + + // Get runner + dutyRunner := v.DutyRunners.DutyRunnerForMsgID(msgID) + if dutyRunner == nil { + return traces.Errorf(span, "could not get duty runner for msg ID %v", msgID) + } + + // Validate message for runner + if err := validateMessage(v.Share.Share, msg); err != nil { + return traces.Errorf(span, "message invalid for msg ID %v: %w", msgID, err) + } + switch msgType { + case spectypes.SSVConsensusMsgType: + qbftMsg, ok := msg.Body.(*specqbft.Message) + if !ok { + return traces.Errorf(span, "could not decode consensus message from network message") + } + if err := qbftMsg.Validate(); err != nil { + return traces.Errorf(span, "invalid QBFT Message: %w", err) + } + + if err := dutyRunner.ProcessConsensus(ctx, logger, msg.SignedSSVMessage); err != nil { + return traces.Errorf(span, "process consensus message: %w", err) + } + + span.SetStatus(codes.Ok, "") + return nil + case spectypes.SSVPartialSignatureMsgType: + signedMsg, ok := msg.Body.(*spectypes.PartialSignatureMessages) + if !ok { + return traces.Errorf(span, "could not decode post consensus message from network message") + } + + span.SetAttributes(observability.ValidatorPartialSigMsgTypeAttribute(signedMsg.Type)) + + if len(msg.SignedSSVMessage.OperatorIDs) != 1 { + return traces.Errorf(span, "PartialSignatureMessage has more than 1 signer") + } + + if err := signedMsg.ValidateForSigner(msg.SignedSSVMessage.OperatorIDs[0]); err != nil { + return traces.Errorf(span, "invalid PartialSignatureMessages: %w", err) + } + + if signedMsg.Type == spectypes.PostConsensusPartialSig { + span.AddEvent("processing post-consensus message") + if err := dutyRunner.ProcessPostConsensus(ctx, logger, signedMsg); err != nil { + return traces.Errorf(span, "process post-consensus message: %w", err) + } + span.SetStatus(codes.Ok, "") + return nil + } + + span.AddEvent("processing pre-consensus message") + if err := dutyRunner.ProcessPreConsensus(ctx, logger, signedMsg); err != nil { + return traces.Errorf(span, "process pre-consensus message: %w", err) + } + + span.SetStatus(codes.Ok, "") + return nil + case message.SSVEventMsgType: + eventMsg, ok := msg.Body.(*ssvtypes.EventMsg) + if !ok { + return traces.Errorf(span, "could not decode event message") + } + + span.SetAttributes(observability.ValidatorEventTypeAttribute(eventMsg.Type)) + + switch eventMsg.Type { + case ssvtypes.Timeout: + timeoutData, err := eventMsg.GetTimeoutData() + if err != nil { + return traces.Errorf(span, "get timeout data: %w", err) + } + + if err := dutyRunner.OnTimeoutQBFT(ctx, logger, timeoutData); err != nil { + return traces.Errorf(span, "timeout event: %w", err) + } + + span.SetStatus(codes.Ok, "") + return nil + case ssvtypes.ExecuteDuty: + if err := v.OnExecuteDuty(ctx, logger, eventMsg); err != nil { + return traces.Errorf(span, "execute duty event: %w", err) + } + + span.SetStatus(codes.Ok, "") + return nil + default: + return traces.Errorf(span, "unknown event msg - %s", eventMsg.Type.String()) + } + default: + return traces.Errorf(span, "unknown message type %d", msgType) + } +} + +func validateMessage(share spectypes.Share, msg *queue.SSVMessage) error { + if !share.ValidatorPubKey.MessageIDBelongs(msg.GetID()) { + return fmt.Errorf("msg ID doesn't match validator ID") + } + + if len(msg.GetData()) == 0 { + return fmt.Errorf("msg data is invalid") + } + + return nil +} diff --git a/protocol/v2/qbft/testing/utils.go b/protocol/v2/testing/utils.go similarity index 100% rename from protocol/v2/qbft/testing/utils.go rename to protocol/v2/testing/utils.go diff --git a/protocol/v2/types/messages.go b/protocol/v2/types/messages.go index 1ba0d0e6c6..7639ac36c4 100644 --- a/protocol/v2/types/messages.go +++ b/protocol/v2/types/messages.go @@ -78,3 +78,10 @@ func (m *EventMsg) Encode() ([]byte, error) { func (m *EventMsg) Decode(data []byte) error { return json.Unmarshal(data, &m) } + +// PartialSigMsgSigner returns the signer for the provided partial-sig message. The signer must be the same for +// all messages, and at least 1 message must be present (this is assumed to have been validated before calling +// this func). +func PartialSigMsgSigner(msg *spectypes.PartialSignatureMessages) spectypes.OperatorID { + return msg.Messages[0].Signer +}