Skip to content

Commit

Permalink
Change jetstream_slots_available_percentage to `jetstream_slots_use…
Browse files Browse the repository at this point in the history
…d_percentage` (#102)

* initial_commit

* pylint

* updated example
  • Loading branch information
Bslabe123 authored Jun 12, 2024
1 parent 8a1e313 commit 26872c3
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
6 changes: 3 additions & 3 deletions docs/observability-prometheus-metrics-in-jetstream-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Now that we configured `prometheus_port=9090` above, we can observe various Jets
# HELP jetstream_prefill_backlog_size Size of prefill queue
# TYPE jetstream_prefill_backlog_size gauge
jetstream_prefill_backlog_size{id="SOME-HOSTNAME-HERE>"} 0.0
# HELP jetstream_slots_available_percentage The percentage of available slots in decode batch
# TYPE jetstream_slots_available_percentage gauge
jetstream_slots_available_percentage{id="<SOME-HOSTNAME-HERE>",idx="0"} 0.96875
# HELP jetstream_slots_used_percentage The percentage of decode slots currently being used
# TYPE jetstream_slots_used_percentage gauge
jetstream_slots_used_percentage{id="<SOME-HOSTNAME-HERE>",idx="0"} 0.04166666666666663
```
10 changes: 5 additions & 5 deletions jetstream/core/metrics/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ def __new__(cls):
documentation="Size of prefill queue",
labelnames=["id"],
)
_slots_available_percentage = Gauge(
name="jetstream_slots_available_percentage",
documentation="The percentage of available slots in decode batch",
_slots_used_percentage = Gauge(
name="jetstream_slots_used_percentage",
documentation="The percentage of decode slots currently being used",
labelnames=["id", "idx"],
)

def get_prefill_backlog_metric(self):
return self._prefill_backlog.labels(id=self._id)

def get_slots_available_percentage_metric(self, idx: int):
return self._slots_available_percentage.labels(id=self._id, idx=idx)
def get_slots_used_percentage_metric(self, idx: int):
return self._slots_used_percentage.labels(id=self._id, idx=idx)
6 changes: 4 additions & 2 deletions jetstream/core/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,9 +597,11 @@ def _generate_thread(self, idx: int):
max_concurrent_decodes = generate_engine.max_concurrent_decodes

if self._metrics_collector:
self._metrics_collector.get_slots_available_percentage_metric(
self._metrics_collector.get_slots_used_percentage_metric(
idx
).set_function(lambda: float(my_slots.qsize() / max_concurrent_decodes))
).set_function(
lambda: float(1 - (my_slots.qsize() / max_concurrent_decodes))
)

# Check if there are any free my_slots. We don't want to block here since
# we can still generate if we can't insert. We do this in a while loop to
Expand Down

0 comments on commit 26872c3

Please sign in to comment.