Skip to content

Commit

Permalink
stagedsync: add dbg.SaveHeapProfileNearOOM to headers stage (#11549) (#…
Browse files Browse the repository at this point in the history
…11551)

cherry-pick 2a98f6a for E2

relates to:
#10734
#11387

restart Erigon with `SAVE_HEAP_PROFILE = true` env variable wait until
we reach 45% or more alloc in stage_headers when "noProgressCounter >=
5" or "Rejected header marked as bad"
  • Loading branch information
taratorio committed Aug 12, 2024
1 parent 83482a4 commit a1e7362
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 2 deletions.
29 changes: 29 additions & 0 deletions erigon-lib/common/dbg/experiments.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,22 @@ package dbg

import (
"os"
"path/filepath"
"runtime"
"runtime/pprof"
"strconv"
"sync"
"time"

"github.com/ledgerwatch/log/v3"

"github.com/ledgerwatch/erigon-lib/mmap"
)

var (
// force skipping of any non-Erigon2 .torrent files
DownloaderOnlyBlocks = EnvBool("DOWNLOADER_ONLY_BLOCKS", false)
saveHeapProfile = EnvBool("SAVE_HEAP_PROFILE", false)
)

var StagesOnlyBlocks = EnvBool("STAGES_ONLY_BLOCKS", false)
Expand Down Expand Up @@ -320,3 +325,27 @@ func LogHashMismatchReason() bool {
})
return logHashMismatchReason
}

func SaveHeapProfileNearOOM() {
if !saveHeapProfile {
return
}

var m runtime.MemStats
ReadMemStats(&m)
if m.Alloc < (mmap.TotalMemory()/100)*45 {
return
}

// above 45%
filePath := filepath.Join(os.TempDir(), "erigon-mem.prof")
log.Info("[Experiment] saving heap profile as near OOM", "alloc", m.Alloc, "filePath", filePath)

f, _ := os.Create(filePath)
defer func() {
_ = f.Close()
}()

runtime.GC()
_ = pprof.WriteHeapProfile(f)
}
1 change: 1 addition & 0 deletions eth/stagedsync/stage_headers.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ Loop:
logger.Info("Req/resp stats", "req", stats.Requests, "reqMin", stats.ReqMinBlock, "reqMax", stats.ReqMaxBlock,
"skel", stats.SkeletonRequests, "skelMin", stats.SkeletonReqMinBlock, "skelMax", stats.SkeletonReqMaxBlock,
"resp", stats.Responses, "respMin", stats.RespMinBlock, "respMax", stats.RespMaxBlock, "dups", stats.Duplicates)
dbg.SaveHeapProfileNearOOM()
cfg.hd.LogAnchorState()
if wasProgress {
logger.Warn("Looks like chain is not progressing, moving to the next stage")
Expand Down
7 changes: 5 additions & 2 deletions turbo/stages/headerdownload/header_algos.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"strings"
"time"

"github.com/ledgerwatch/erigon-lib/common/dbg"
"github.com/ledgerwatch/erigon-lib/common/metrics"
"github.com/ledgerwatch/erigon-lib/kv/dbutils"

Expand Down Expand Up @@ -104,7 +105,8 @@ func (hd *HeaderDownload) SingleHeaderAsSegment(headerRaw []byte, header *types.
headerHash := types.RawRlpHash(headerRaw)
if _, bad := hd.badHeaders[headerHash]; bad {
hd.stats.RejectedBadHeaders++
hd.logger.Warn("[downloader] Rejected header marked as bad", "hash", headerHash, "height", header.Number.Uint64())
dbg.SaveHeapProfileNearOOM()
hd.logger.Warn("[downloader] SingleHeaderAsSegment: Rejected header marked as bad", "hash", headerHash, "height", header.Number.Uint64())
return nil, BadBlockPenalty, nil
}
if penalizePoSBlocks && header.Difficulty.Sign() == 0 {
Expand Down Expand Up @@ -517,7 +519,8 @@ func (hd *HeaderDownload) InsertHeader(hf FeedHeaderFunc, terminalTotalDifficult
hd.removeUpwards(link)
dataflow.HeaderDownloadStates.AddChange(link.blockHeight, dataflow.HeaderBad)
hd.stats.RejectedBadHeaders++
hd.logger.Warn("[downloader] Rejected header marked as bad", "hash", link.hash, "height", link.blockHeight)
dbg.SaveHeapProfileNearOOM()
hd.logger.Warn("[downloader] InsertHeader: Rejected header marked as bad", "hash", link.hash, "height", link.blockHeight)
return true, false, 0, lastTime, nil
}
if !link.verified {
Expand Down

0 comments on commit a1e7362

Please sign in to comment.