Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refresh SimulatePubnet to be more realistic #195

Merged
merged 1 commit into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/App/Program.fs
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ type MissionOptions
[<Option("network-size-limit",
HelpText = "The number of nodes to run in SimulatePubnet",
Required = false,
Default = 100)>]
Default = 600)>]
member self.NetworkSizeLimit = networkSizeLimit

[<Option("pubnet-parallel-catchup-starting-ledger",
Expand Down
2 changes: 1 addition & 1 deletion src/FSLibrary/MissionSimulatePubnet.fs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ let simulatePubnet (context: MissionContext) =
spikeInterval = 65
enableTailLogging = false }

let fullCoreSet = FullPubnetCoreSets context true true
let fullCoreSet = FullPubnetCoreSets context true false

let sdf =
List.find (fun (cs: CoreSet) -> cs.name.StringName = "stellar" || cs.name.StringName = "sdf") fullCoreSet
Expand Down
11 changes: 5 additions & 6 deletions src/FSLibrary/StellarKubeSpecs.fs
Original file line number Diff line number Diff line change
Expand Up @@ -120,24 +120,23 @@ let NetworkDelayScriptResourceRequirements : V1ResourceRequirements =
let GetSimulatePubnetResources networkSize : V1ResourceRequirements =
// Running simulate-pubnet _needs_ a ways over 200MB RSS per node, and
// depending on queue backups it can spike over 300MB; we have 64GB limit
// for quota so to be generous we give each node 400MB limit and run only
// for quota so to be generous we give each node 600MB limit and run only
// 100 nodes (despite survey showing many more).
//
// We also have a 100vCPU quota but only really 72 cores to play with, so
// to keep some spare room for other jobs without stressing the workers we
// want to stay under 50vCPU, again divided 100 ways across our simulated
// nodes.
//
// So we allocate a 64MB RAM request and 400MB RAM limit to each, and a
// So we allocate a 64MB RAM request and 600MB RAM limit to each, and a
// 0.025vCPU request and 0.5vCPU limit to each.
//
// It increases the resource requirement in case the network size is big.
let cpuReqMili = 25
let memReqMebi = 64
let cpuLimMili = 500
let memLimMebi = 400
let k = if networkSize >= 200 then 2 else 1
makeResourceRequirements (k * cpuReqMili) (k * memReqMebi) (k * cpuLimMili) (k * memLimMebi)
let memLimMebi = 600
makeResourceRequirements cpuReqMili memReqMebi cpuLimMili memLimMebi

let SimulatePubnetTier1PerfCoreResourceRequirements : V1ResourceRequirements =
// Tier1 perf simulation is interested in "how fast can we go in practice"
Expand Down Expand Up @@ -808,7 +807,7 @@ type NetworkCfg with
V1StatefulSetSpec(
selector = V1LabelSelector(matchLabels = CfgVal.labels),
serviceName = self.ServiceName,
podManagementPolicy = "OrderedReady",
podManagementPolicy = "Parallel",
template = self.ToPodTemplateSpec coreSet,
replicas = System.Nullable<int>(coreSet.CurrentCount)
)
Expand Down
Loading