Skip to content
This repository has been archived by the owner on Aug 23, 2023. It is now read-only.

update memory index prune to hold write lock as briefly as possible #787

Merged
merged 1 commit into from
Dec 21, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions idx/memory/memory.go
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,11 @@ func (m *MemoryIdx) AddOrUpdate(data *schema.MetricData, partition int32) idx.Ar

func (m *MemoryIdx) Update(entry idx.Archive) {
m.Lock()
defer m.Unlock()
if _, ok := m.DefById[entry.Id]; !ok {
m.Unlock()
return
}
*(m.DefById[entry.Id]) = entry
m.Unlock()
}

// indexTags reads the tags of a given metric definition and creates the
Expand Down Expand Up @@ -248,6 +247,7 @@ func (m *MemoryIdx) deindexTags(def *schema.MetricDefinition) {
// Used to rebuild the index from an existing set of metricDefinitions.
func (m *MemoryIdx) Load(defs []schema.MetricDefinition) int {
m.Lock()
defer m.Unlock()
var pre time.Time
var num int
for i := range defs {
Expand All @@ -273,7 +273,6 @@ func (m *MemoryIdx) Load(defs []schema.MetricDefinition) int {
statMetricsActive.Inc()
statAddDuration.Value(time.Since(pre))
}
m.Unlock()
return num
}

Expand Down Expand Up @@ -887,26 +886,28 @@ func (m *MemoryIdx) Prune(orgId int, oldest time.Time) ([]idx.Archive, error) {
oldestUnix := oldest.Unix()
var pruned []idx.Archive
pre := time.Now()
m.RLock()
orgs := []int{orgId}
if orgId == -1 {
log.Info("memory-idx: pruning stale metricDefs across all orgs")
m.RLock()
orgs = make([]int, len(m.Tree))
i := 0
for org := range m.Tree {
orgs[i] = org
i++
}
m.RUnlock()
}
m.RUnlock()
for _, org := range orgs {
m.Lock()
m.RLock()
tree, ok := m.Tree[org]
if !ok {
m.Unlock()
m.RUnlock()
continue
}

var toPrune []string

for _, n := range tree.Items {
if !n.Leaf() {
continue
Expand All @@ -918,15 +919,29 @@ func (m *MemoryIdx) Prune(orgId int, oldest time.Time) ([]idx.Archive, error) {
}
}
if staleCount == len(n.Defs) {
log.Debug("memory-idx: series %s for orgId:%d is stale. pruning it.", n.Path, org)
//we need to delete this node.
defs := m.delete(org, n, true)
statMetricsActive.Dec()
pruned = append(pruned, defs...)
toPrune = append(toPrune, n.Path)
}
}
m.Unlock()
m.RUnlock()

for _, path := range toPrune {
m.Lock()
n, ok := tree.Items[path]
if !ok {
m.Unlock()
log.Debug("memory-idx: series %s for orgId:%d was identified for pruning but cannot be found.", path, org)
continue
}

log.Debug("memory-idx: series %s for orgId:%d is stale. pruning it.", n.Path, org)
defs := m.delete(org, n, true)
statMetricsActive.Dec()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated to this PR, but this looks incorrect. seems like m.delete may delete several paths (children), this assumes 1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't the if !n.Leaf() { check on line 912 mean that there should never be children? We could add that same check here in case of a race condition where a child is added to a series that's about to be pruned, but that seems like a highly unlikely scenario.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't the if !n.Leaf() { check on line 912 mean that there should never be children?

the only thing that branch does is assure that nodes without metricdefs (data corresponding to that path) are not subject to pruning.
nodes with metricdefs (leaf nodes) are subject to pruning. I think the confusion here is because
in our implementation, a path can be both a leaf and have children.
e.g. foo.bar can have data/metricdef (making it a leaf), while foo.bar.baz also exists (making foo.bar a branch)
In graphite this doesn't work, but some of our customers wanted this behavior, and it seemed fairly easy to support, so that's why we do.

the m.delete call is recursive: it deletes the path, and any sub-paths (I think i actually just discovered a related bug here), so what we should do is track the actual amount of paths that got deleted by the delete call, or I think better: the pruning process shouldn't recursively delete, but rather only delete leaves (and clean up stale branch nodes that no longer have children)
But this warrants a new ticket, so i opened #797

pruned = append(pruned, defs...)
m.Unlock()
}
}

if orgId == -1 {
log.Info("memory-idx: pruning stale metricDefs from memory for all orgs took %s", time.Since(pre).String())
}
Expand Down