-
Notifications
You must be signed in to change notification settings - Fork 45
/
perf_reader.h
264 lines (223 loc) · 7.1 KB
/
perf_reader.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <bitset>
#include <queue>
#include <utility>
#include <vector>
#include <platform/platform.h>
#include <collector/kernel/bpf_src/render_bpf.h>
#include <util/perf_ring_cpp.h>
/**
* Container for multiple CPU readers.
*
* Accessing values is done through PerfReader. PerfReader starts and finishes
* read batches, and maintains the entries_ heap.
*
* If a reader is non-empty, PerfReader will populate an entry in entries_:
* * if the next entry is PERF_LOST_RECORD, the timestamp will be ~0ull
* * if the next entry is PERF_RECORD_SAMPLE, the timestamp will be the one
* encoded in the sample. This code assume that a sample starts with
* [ perf_event_header + u32 size + u32 unpadded_size + u64 timestamp ]
*/
class PerfContainer {
public:
/**
* C'tor
* @param cpus: the CPUs whose rings we should map
* @param n_pages: the number of data pages for in each ring.
*/
PerfContainer();
/* disallow copy and assignment */
PerfContainer(const PerfContainer &) = delete;
void operator=(const PerfContainer &) = delete;
/**
* Add the control channel ring to the collection
*
* Throws if trying to add more than 64 elements
*/
void add_ring(PerfRing &pr);
/**
* Add the data channel ring to the collection
*
* Throws if trying to add more than 64 elements
*/
void add_data_ring(PerfRing &pr);
/**
* Set a callback to execute when events show up in the
* control channel perf ring
* Used if you are not polling for high-speed access
* Must be called only after all perf rings are added.
*/
typedef void CALLBACK(void *ctx);
void set_callback(uv_loop_t &loop, void *ctx, CALLBACK cb);
/**
* Debugging routine to inspect the contents of the perf container
*/
std::string inspect(void);
// returns the number of perf rings in this container
std::size_t size() const { return readers_.size(); }
// returns a reference to the i-th perf ring
PerfRing const &operator[](std::size_t i) const { return readers_[i]; }
PerfRing const &data_ring(std::size_t i) const { return data_readers_[i]; }
PerfRing &data_ring(std::size_t i) { return data_readers_[i]; }
private:
friend class PerfReader;
/* Readers for each live CPU */
std::vector<PerfRing> readers_;
std::vector<PerfRing> data_readers_;
/* (timestamp, reader_index) pairs */
struct PerfEntry {
u64 timestamp;
size_t reader_index;
inline bool operator<(const PerfEntry &other) const
{
// Reverse the sense of the PerfEntry sort order
// this way, our entries_ heap puts the earliest timestamps first,
// making this a -min heap- instead of a -max heap-
return timestamp > other.timestamp;
}
};
PerfEntry entries_[BPF_MAX_CPUS];
size_t n_entries_;
/* bitmask: which readers from readers_ are already in entries_ */
std::bitset<BPF_MAX_CPUS> readers_in_entries_;
};
/**
* Read sorted values from multiple queues.
*/
class PerfReader {
public:
/**
* C'tor
*
* Starts a sorted read operation.
*/
PerfReader(PerfContainer &container, u64 max_timestamp);
/* D'tor */
~PerfReader();
/**
* Returns true if there are no more events to read safely while keeping
* sort.
*/
bool empty();
/**
* Returns the number of bytes left to read of of the perf ring, and
* optionally the total size of the ring
*/
inline u32 bytes_remaining(u32 *total_bytes);
/**
* Returns the type of the next value
*
* Assumes reader is not empty (i.e., !empty())
*/
inline u32 peek_type() const { return top().peek_type(); }
/**
* Returns the total size of the next perf event
*
* Assumes reader is not empty (i.e., !empty())
*/
inline u32 peek_size() { return top().peek_size(); }
/**
* Returns the length of the payload of the next value
*
* Assumes reader is not empty (i.e., !empty()) and type==PERF_RECORD_SAMPLE
*/
inline u16 peek_unpadded_length() { return top().peek_aligned_u32(sizeof(u32)); }
/**
* Returns the length of the payload of the next value
*
* Assumes reader is not empty (i.e., !empty()) and type==PERF_RECORD_SAMPLE
*/
inline u16 peek_rpc_id() { return top().peek_aligned_u16(2 * sizeof(u64)); }
/**
* Returns the number of lost samples, if type is PERF_RECORD_LOST
*/
inline u64 peek_n_lost() { return top().peek_aligned_u64(sizeof(u64)); }
/**
* Returns a view into the sample's contents, without the perf event header.
*
* The messages are stored in a ring buffer, so if they're located at the end
* of the ring and wrap-around to the beginning, then the view will be spread
* over two chunks, in the order specified by `first` and `second` members of
* the returned pair. Otherwise, the view will have exactly one chunk
* represented by `first` in the returned pair and `second` will be empty.
*
* @assumes: `peek_type()` == `PERF_RECORD_SAMPLE`.
*/
inline std::pair<std::string_view, std::string_view> peek_message() const
{
auto const &ring = top();
assert(ring.peek_type() == PERF_RECORD_SAMPLE);
return ring.peek();
}
/**
* Returns which cpu index we're reading from next
*/
inline size_t peek_index() const
{
size_t idx = container_.entries_[0].reader_index;
return idx;
}
/**
* Copies the payload of a sample entry to the specified buffer.
*
* Assumes reader is not empty (i.e., !empty())
* Assumes type is PERF_RECORD_SAMPLE
* Assumes there is enough space in the destination (>= peek_sample_length())
*/
void pop_and_copy_to(char *dest);
/**
* Copies the payload of a sample entry to the specified buffer.
* Uses the native size of the payload message instead of a header to
* determine the length Assumes there is no padding on the perf_submit side
*
* Assumes reader is not empty (i.e., !empty())
* Assumes type is PERF_RECORD_SAMPLE
* Assumes there is enough space in the destination (>= peek_sample_length())
*/
void pop_unpadded_and_copy_to(char *dest);
/**
* Pops the entry, and updates data structures
*
* Assumes reader is not empty (i.e., !empty())
*/
void pop();
/**
* Explicitly finish the batch
*/
void stop();
private:
/**
* Returns the reader with the smallest
*/
inline PerfRing &top()
{
size_t idx = container_.entries_[0].reader_index;
return container_.readers_[idx];
}
inline PerfRing const &top() const
{
size_t idx = container_.entries_[0].reader_index;
return container_.readers_[idx];
}
/**
* Updates container_.entries_ and container_.readers_in_entries_
*/
void update_after_pop();
/**
* Updates the reader's state given that it is not in container_.entries_
*/
void update_when_not_in_entries(size_t idx);
/* container to read from */
PerfContainer &container_;
/* the maximum timestamp we should accept */
u64 max_timestamp_;
/* is the reader active */
bool active_;
/* This class acts as a "Guard", so disallow copy and assignment */
PerfReader(const PerfReader &) = delete;
void operator=(const PerfReader &) = delete;
};