Skip to content

Commit

Permalink
[backport] Reducing log spam (#6128) (#6191)
Browse files Browse the repository at this point in the history
* use zap in syncer package (#6121)

## Motivation

Part of the effort to switch from log to zap.

* use zap in proposals package (#6146)

## Motivation

Part of the effort to migrate from log to zap.

* use zap in mesh package (#6147)

## Motivation

Part of the effort to migrate from log to zap.

* use zap in the fetch package (#6109)

## Motivation

Part of the effort to switch from log to zap.

* reducing log spam, shortening error messages in logs (#6128)

Closes #5887

There are two main issues:
1. We have too many level Info+  logs emitted too frequently
2. Some logs are too long

* update changelog
  • Loading branch information
poszu authored Jul 30, 2024
1 parent 05b6b00 commit 7a16b18
Show file tree
Hide file tree
Showing 59 changed files with 717 additions and 794 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ See [RELEASE](./RELEASE.md) for workflow instructions.
* [#6187](https://github.com/spacemeshos/go-spacemesh/pull/6187) The merge tool now ignores files that are not `.key`
files in the `identities` directory when merging two nodes.

* [#6128](https://github.com/spacemeshos/go-spacemesh/pull/6128) Reduce logs spam

## Release v1.6.4

### Improvements
Expand Down
22 changes: 13 additions & 9 deletions activation/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,13 @@ func (b *Builder) run(ctx context.Context, sig *signing.EdSigner) {
for _, poet := range b.poets {
eg.Go(func() error {
_, err := poet.Certify(ctx, sig.NodeID())
if err != nil {
switch {
case errors.Is(err, ErrCertificatesNotSupported):
b.logger.Debug("not certifying (not supported in poet)",
log.ZShortStringer("smesherID", sig.NodeID()),
zap.String("poet", poet.Address()),
)
case err != nil:
b.logger.Warn("failed to certify poet", zap.Error(err), log.ZShortStringer("smesherID", sig.NodeID()))
}
return nil
Expand Down Expand Up @@ -502,7 +508,6 @@ func (b *Builder) BuildNIPostChallenge(ctx context.Context, nodeID types.NodeID)
// build new challenge
logger.Info("building new NiPOST challenge", zap.Uint32("current_epoch", current.Uint32()))
case err != nil:
logger.Info("failed to load NiPoST challenge from local state", zap.Error(err))
return nil, fmt.Errorf("get nipost challenge: %w", err)
case challenge.PublishEpoch < current:
logger.Info(
Expand All @@ -521,7 +526,7 @@ func (b *Builder) BuildNIPostChallenge(ctx context.Context, nodeID types.NodeID)
}
default:
// challenge is fresh
logger.Info("loaded NiPoST challenge from local state",
logger.Debug("loaded NiPoST challenge from local state",
zap.Uint32("current_epoch", current.Uint32()),
zap.Uint32("publish_epoch", challenge.PublishEpoch.Uint32()),
)
Expand Down Expand Up @@ -626,7 +631,7 @@ func (b *Builder) BuildNIPostChallenge(ctx context.Context, nodeID types.NodeID)
PositioningATX: posAtx,
}
}
logger.Info("persisting the new NiPOST challenge", zap.Object("challenge", challenge))
logger.Debug("persisting the new NiPOST challenge", zap.Object("challenge", challenge))
if err := nipost.AddChallenge(b.localDB, nodeID, challenge); err != nil {
return nil, fmt.Errorf("add nipost challenge: %w", err)
}
Expand Down Expand Up @@ -687,7 +692,6 @@ func (b *Builder) PublishActivationTx(ctx context.Context, sig *signing.EdSigner
return fmt.Errorf("wait for publication epoch: %w", ctx.Err())
case <-b.layerClock.AwaitLayer(challenge.PublishEpoch.FirstLayer()):
}
b.logger.Debug("publication epoch has arrived!", log.ZShortStringer("smesherID", sig.NodeID()))

for {
b.logger.Info(
Expand Down Expand Up @@ -917,13 +921,13 @@ func (b *Builder) getPositioningAtx(
return types.EmptyATXID, err
}

b.logger.Info("found candidate positioning atx",
b.logger.Debug("found candidate positioning atx",
log.ZShortStringer("id", id),
log.ZShortStringer("smesherID", nodeID),
)

if previous == nil {
b.logger.Info("selected atx as positioning atx",
b.logger.Info("selected positioning atx",
log.ZShortStringer("id", id),
log.ZShortStringer("smesherID", nodeID))
return id, nil
Expand Down Expand Up @@ -1001,14 +1005,14 @@ func findFullyValidHighTickAtx(

// iterate trough epochs, to get first valid, not malicious ATX with the biggest height
atxdata.IterateHighTicksInEpoch(publish+1, func(id types.ATXID) (contSearch bool) {
logger.Info("found candidate for high-tick atx", log.ZShortStringer("id", id))
logger.Debug("found candidate for high-tick atx", log.ZShortStringer("id", id))
if ctx.Err() != nil {
return false
}
// verify ATX-candidate by getting their dependencies (previous Atx, positioning ATX etc.)
// and verifying PoST for every dependency
if err := validator.VerifyChain(ctx, id, goldenATXID, opts...); err != nil {
logger.Info("rejecting candidate for high-tick atx", zap.Error(err), log.ZShortStringer("id", id))
logger.Debug("rejecting candidate for high-tick atx", zap.Error(err), log.ZShortStringer("id", id))
return true
}
found = &id
Expand Down
10 changes: 5 additions & 5 deletions activation/certifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ func (c *CertifierClient) obtainPostFromLastAtx(ctx context.Context, nodeId type
}
}

c.logger.Info("found POST in an existing ATX", zap.String("atx_id", atxid.Hash32().ShortString()))
c.logger.Debug("found POST in an existing ATX", zap.String("atx_id", atxid.Hash32().ShortString()))
return &nipost.Post{
Nonce: post.Nonce,
Indices: post.Indices,
Expand All @@ -222,21 +222,21 @@ func (c *CertifierClient) obtainPostFromLastAtx(ctx context.Context, nodeId type
}

func (c *CertifierClient) obtainPost(ctx context.Context, id types.NodeID) (*nipost.Post, error) {
c.logger.Info("looking for POST for poet certification")
c.logger.Debug("looking for POST for poet certification")
post, err := nipost.GetPost(c.localDb, id)
switch {
case err == nil:
c.logger.Info("found POST in local DB")
c.logger.Debug("found POST in local DB")
return post, nil
case errors.Is(err, sql.ErrNotFound):
// no post found
default:
return nil, fmt.Errorf("loading initial post from db: %w", err)
}

c.logger.Info("POST not found in local DB. Trying to obtain POST from an existing ATX")
c.logger.Debug("POST not found in local DB. Trying to obtain POST from an existing ATX")
if post, err := c.obtainPostFromLastAtx(ctx, id); err == nil {
c.logger.Info("found POST in an existing ATX")
c.logger.Debug("found POST in an existing ATX")
if err := nipost.AddPost(c.localDb, id, *post); err != nil {
c.logger.Error("failed to save post", zap.Error(err))
}
Expand Down
10 changes: 5 additions & 5 deletions activation/handler_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func (h *HandlerV1) syntacticallyValidateDeps(
}

expectedChallengeHash := atx.NIPostChallengeV1.Hash()
h.logger.Info("validating nipost",
h.logger.Debug("validating nipost",
log.ZContext(ctx),
zap.Stringer("expected_challenge_hash", expectedChallengeHash),
zap.Stringer("atx_id", atx.ID()),
Expand All @@ -257,7 +257,7 @@ func (h *HandlerV1) syntacticallyValidateDeps(
)
var invalidIdx *verifying.ErrInvalidIndex
if errors.As(err, &invalidIdx) {
h.logger.Info("ATX with invalid post index",
h.logger.Debug("ATX with invalid post index",
log.ZContext(ctx),
zap.Stringer("atx_id", atx.ID()),
zap.Int("index", invalidIdx.Index),
Expand Down Expand Up @@ -304,7 +304,7 @@ func (h *HandlerV1) validateNonInitialAtx(
}

if needRecheck {
h.logger.Info("validating VRF nonce",
h.logger.Debug("validating VRF nonce",
log.ZContext(ctx),
zap.Stringer("atx_id", atx.ID()),
zap.Bool("post increased", atx.NumUnits > previous.NumUnits),
Expand Down Expand Up @@ -691,7 +691,7 @@ func (h *HandlerV1) processATX(
}

events.ReportNewActivation(atx)
h.logger.Info("new atx",
h.logger.Debug("new atx",
log.ZContext(ctx),
zap.Inline(atx),
zap.Bool("malicious", proof != nil),
Expand Down Expand Up @@ -721,7 +721,7 @@ func (h *HandlerV1) fetchReferences(ctx context.Context, poetRef types.Hash32, a
}

if err := h.fetcher.GetAtxs(ctx, atxIDs, system.WithoutLimiting()); err != nil {
return fmt.Errorf("missing atxs %x: %w", atxIDs, err)
return fmt.Errorf("missing atxs %s: %w", atxIDs, err)
}

h.logger.Debug("done fetching references",
Expand Down
2 changes: 1 addition & 1 deletion activation/handler_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ func (h *HandlerV2) fetchReferences(ctx context.Context, poetRefs []types.Hash32
if len(atxIDs) != 0 {
eg.Go(func() error {
if err := h.fetcher.GetAtxs(ctx, atxIDs, system.WithoutLimiting()); err != nil {
return fmt.Errorf("missing atxs %x: %w", atxIDs, err)
return fmt.Errorf("missing atxs %s: %w", atxIDs, err)
}
return nil
})
Expand Down
3 changes: 3 additions & 0 deletions activation/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ type PoetService interface {
nodeID types.NodeID,
) (*types.PoetRound, error)

// Certify requests a certificate for the given nodeID.
//
// Returns ErrCertificatesNotSupported if the service does not support certificates.
Certify(ctx context.Context, id types.NodeID) (*certifier.PoetCert, error)

// Proof returns the proof for the given round ID.
Expand Down
10 changes: 5 additions & 5 deletions activation/poet.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
var (
ErrInvalidRequest = errors.New("invalid request")
ErrUnauthorized = errors.New("unauthorized")
errCertificatesNotSupported = errors.New("poet doesn't support certificates")
ErrCertificatesNotSupported = errors.New("poet doesn't support certificates")
)

type PoetPowParams struct {
Expand Down Expand Up @@ -115,7 +115,7 @@ func WithLogger(logger *zap.Logger) PoetClientOpts {
c.logger = logger
c.client.Logger = &retryableHttpLogger{inner: logger}
c.client.ResponseLogHook = func(logger retryablehttp.Logger, resp *http.Response) {
c.logger.Info(
c.logger.Debug(
"response received",
zap.Stringer("url", resp.Request.URL),
zap.Int("status", resp.StatusCode),
Expand Down Expand Up @@ -191,7 +191,7 @@ func (c *HTTPPoetClient) CertifierInfo(ctx context.Context) (*url.URL, []byte, e
}
certifierInfo := info.GetCertifier()
if certifierInfo == nil {
return nil, nil, errCertificatesNotSupported
return nil, nil, ErrCertificatesNotSupported
}
url, err := url.Parse(certifierInfo.Url)
if err != nil {
Expand Down Expand Up @@ -309,7 +309,7 @@ func (c *HTTPPoetClient) req(ctx context.Context, method, path string, reqBody,
}

if res.StatusCode != http.StatusOK {
c.logger.Info("got poet response != 200 OK", zap.String("status", res.Status), zap.String("body", string(data)))
c.logger.Debug("poet request failed", zap.String("status", res.Status), zap.String("body", string(data)))
}

switch res.StatusCode {
Expand Down Expand Up @@ -424,7 +424,7 @@ func (c *poetService) authorize(
switch {
case err == nil:
return &PoetAuth{PoetCert: cert}, nil
case errors.Is(err, errCertificatesNotSupported):
case errors.Is(err, ErrCertificatesNotSupported):
logger.Debug("poet doesn't support certificates")
default:
logger.Warn("failed to certify", zap.Error(err))
Expand Down
2 changes: 1 addition & 1 deletion activation/poetdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (db *PoetDb) StoreProof(ctx context.Context, ref types.PoetProofRef, proofM
)
}

db.logger.Info("stored poet proof",
db.logger.Debug("stored poet proof",
log.ZContext(ctx),
log.ZShortStringer("poet_proof_id", types.Hash32(ref)),
zap.String("round_id", proofMessage.RoundID),
Expand Down
2 changes: 1 addition & 1 deletion activation/post_supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (ps *PostSupervisor) runCmd(
ps.logger.Error("start post service", zap.Error(err))
return nil
}
ps.logger.Info(
ps.logger.Debug(
"post service started",
zap.Int("pid", cmd.Process.Pid),
zap.String("cmd", cmd.String()),
Expand Down
12 changes: 6 additions & 6 deletions activation/post_verifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,9 @@ func newOffloadingPostVerifier(
v.prioritizedIds[id] = struct{}{}
}

v.log.Info("starting post verifier")
v.log.Debug("starting post verifier")
v.scale(numWorkers)
v.log.Info("started post verifier")
v.log.Debug("started post verifier")
return v
}

Expand Down Expand Up @@ -355,18 +355,18 @@ func (v *offloadingPostVerifier) Close() error {
return nil
default:
}
v.log.Info("stopping post verifier")
v.log.Debug("stopping post verifier")
close(v.stop)
v.eg.Wait()

v.verifier.Close()
v.log.Info("stopped post verifier")
v.log.Debug("stopped post verifier")
return nil
}

func (w *postVerifierWorker) start() {
w.log.Info("starting")
defer w.log.Info("stopped")
w.log.Debug("starting")
defer w.log.Debug("stopped")
defer close(w.stopped)

for {
Expand Down
2 changes: 1 addition & 1 deletion activation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ func (v *Validator) VerifyChain(ctx context.Context, id, goldenATXID types.ATXID
for _, opt := range opts {
opt(&options)
}
options.logger.Info("verifying ATX chain", zap.Stringer("atx_id", id))
options.logger.Debug("verifying ATX chain", zap.Stringer("atx_id", id))
return v.verifyChainWithOpts(ctx, id, goldenATXID, options)
}

Expand Down
4 changes: 2 additions & 2 deletions api/grpcserver/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type Server struct {
}

func unaryGrpcLogStart(ctx context.Context, req any, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
ctxzap.Info(ctx, "started unary call")
ctxzap.Debug(ctx, "started unary call")
return handler(ctx, req)
}

Expand All @@ -54,7 +54,7 @@ func streamingGrpcLogStart(
_ *grpc.StreamServerInfo,
handler grpc.StreamHandler,
) error {
ctxzap.Info(stream.Context(), "started streaming call")
ctxzap.Debug(stream.Context(), "started streaming call")
return handler(srv, stream)
}

Expand Down
Loading

0 comments on commit 7a16b18

Please sign in to comment.