diff --git a/src/bin/lttng-sessiond/agent-thread.c b/src/bin/lttng-sessiond/agent-thread.c index c278ce95e5..06ef377a3a 100644 --- a/src/bin/lttng-sessiond/agent-thread.c +++ b/src/bin/lttng-sessiond/agent-thread.c @@ -28,6 +28,15 @@ struct thread_notifiers { sem_t ready; }; +struct agent_app_id { + pid_t pid; + enum lttng_domain_type domain; +}; + +struct agent_protocol_version { + unsigned int major, minor; +}; + static int agent_tracing_enabled = -1; /* @@ -42,10 +51,11 @@ static const char *default_reg_uri = * Update agent application using the given socket. This is done just after * registration was successful. * - * This is a quite heavy call in terms of locking since the session list lock - * AND session lock are acquired. + * This will acquire the various sessions' lock; none must be held by the + * caller. + * The caller must hold the session list lock. */ -static void update_agent_app(struct agent_app *app) +static void update_agent_app(const struct agent_app *app) { struct ltt_session *session, *stmp; struct ltt_session_list *list; @@ -53,7 +63,6 @@ static void update_agent_app(struct agent_app *app) list = session_get_list(); assert(list); - session_lock_list(); cds_list_for_each_entry_safe(session, stmp, &list->head, list) { if (!session_get(session)) { continue; @@ -61,19 +70,18 @@ static void update_agent_app(struct agent_app *app) session_lock(session); if (session->ust_session) { - struct agent *agt; + const struct agent *agt; rcu_read_lock(); agt = trace_ust_find_agent(session->ust_session, app->domain); if (agt) { - agent_update(agt, app->sock->fd); + agent_update(agt, app); } rcu_read_unlock(); } session_unlock(session); session_put(session); } - session_unlock_list(); } /* @@ -188,23 +196,56 @@ static void destroy_tcp_socket(struct lttcomm_sock *sock) lttcomm_destroy_sock(sock); } +static const char *domain_type_str(enum lttng_domain_type domain_type) +{ + switch (domain_type) { + case LTTNG_DOMAIN_NONE: + return "none"; + case LTTNG_DOMAIN_KERNEL: + return "kernel"; + case LTTNG_DOMAIN_UST: + return "ust"; + case LTTNG_DOMAIN_JUL: + return "jul"; + case LTTNG_DOMAIN_LOG4J: + return "log4j"; + case LTTNG_DOMAIN_PYTHON: + return "python"; + default: + return "unknown"; + } +} + +static bool is_agent_protocol_version_supported( + const struct agent_protocol_version *version) +{ + const bool is_supported = version->major == AGENT_MAJOR_VERSION && + version->minor == AGENT_MINOR_VERSION; + + if (!is_supported) { + WARN("Refusing agent connection: unsupported protocol version %ui.%ui, expected %i.%i", + version->major, version->minor, + AGENT_MAJOR_VERSION, AGENT_MINOR_VERSION); + } + + return is_supported; +} + /* - * Handle a new agent registration using the reg socket. After that, a new - * agent application is added to the global hash table and attach to an UST app - * object. If r_app is not NULL, the created app is set to the pointer. + * Handle a new agent connection on the registration socket. * - * Return the new FD created upon accept() on success or else a negative errno - * value. + * Returns 0 on success, or else a negative errno value. + * On success, the resulting socket is returned through `agent_app_socket` + * and the application's reported id is updated through `agent_app_id`. */ -static int handle_registration(struct lttcomm_sock *reg_sock, - struct agent_app **r_app) +static int accept_agent_connection( + struct lttcomm_sock *reg_sock, + struct agent_app_id *agent_app_id, + struct lttcomm_sock **agent_app_socket) { int ret; - pid_t pid; - uint32_t major_version, minor_version; + struct agent_protocol_version agent_version; ssize_t size; - enum lttng_domain_type domain; - struct agent_app *app; struct agent_register_msg msg; struct lttcomm_sock *new_sock; @@ -213,60 +254,52 @@ static int handle_registration(struct lttcomm_sock *reg_sock, new_sock = reg_sock->ops->accept(reg_sock); if (!new_sock) { ret = -ENOTCONN; - goto error; + goto end; } size = new_sock->ops->recvmsg(new_sock, &msg, sizeof(msg), 0); if (size < sizeof(msg)) { + if (size < 0) { + PERROR("Failed to register new agent application"); + } else if (size != 0) { + ERR("Failed to register new agent application: invalid registration message length: expected length = %zu, message length = %zd", + sizeof(msg), size); + } else { + DBG("Failed to register new agent application: connection closed"); + } ret = -EINVAL; - goto error_socket; - } - domain = be32toh(msg.domain); - pid = be32toh(msg.pid); - major_version = be32toh(msg.major_version); - minor_version = be32toh(msg.minor_version); - - /* Test communication protocol version of the registring agent. */ - if (major_version != AGENT_MAJOR_VERSION) { - ret = -EINVAL; - goto error_socket; - } - if (minor_version != AGENT_MINOR_VERSION) { - ret = -EINVAL; - goto error_socket; + goto error_close_socket; } - DBG2("[agent-thread] New registration for pid %d domain %d on socket %d", - pid, domain, new_sock->fd); + agent_version = (struct agent_protocol_version) { + be32toh(msg.major_version), + be32toh(msg.minor_version), + }; - app = agent_create_app(pid, domain, new_sock); - if (!app) { - ret = -ENOMEM; - goto error_socket; + /* Test communication protocol version of the registering agent. */ + if (!is_agent_protocol_version_supported(&agent_version)) { + ret = -EINVAL; + goto error_close_socket; } - /* - * Add before assigning the socket value to the UST app so it can be found - * concurrently. - */ - agent_add_app(app); - - /* - * We don't need to attach the agent app to the app. If we ever do so, we - * should consider both registration order of agent before app and app - * before agent. - */ + *agent_app_id = (struct agent_app_id) { + .domain = (enum lttng_domain_type) be32toh(msg.domain), + .pid = (pid_t) be32toh(msg.pid), + }; - if (r_app) { - *r_app = app; - } + DBG2("New registration for agent application: pid = %ld, domain = %s, socket fd = %d", + (long) agent_app_id->pid, + domain_type_str(agent_app_id->domain), new_sock->fd); - return new_sock->fd; + *agent_app_socket = new_sock; + new_sock = NULL; + ret = 0; + goto end; -error_socket: +error_close_socket: new_sock->ops->close(new_sock); lttcomm_destroy_sock(new_sock); -error: +end: return ret; } @@ -362,7 +395,7 @@ static void *thread_agent_management(void *data) uatomic_set(&agent_tracing_enabled, 1); mark_thread_as_ready(notifiers); - /* Add TCP socket to poll set. */ + /* Add TCP socket to the poll set. */ ret = lttng_poll_add(&events, reg_sock->fd, LPOLLIN | LPOLLERR | LPOLLHUP | LPOLLRDHUP); if (ret < 0) { @@ -399,43 +432,79 @@ static void *thread_agent_management(void *data) goto exit; } + /* Activity on the registration socket. */ if (revents & LPOLLIN) { - int new_fd; - struct agent_app *app = NULL; + struct agent_app_id new_app_id; + struct agent_app *new_app = NULL; + struct lttcomm_sock *new_app_socket; + int new_app_socket_fd; assert(pollfd == reg_sock->fd); - new_fd = handle_registration(reg_sock, &app); - if (new_fd < 0) { + + ret = accept_agent_connection( + reg_sock, &new_app_id, &new_app_socket); + if (ret < 0) { + /* Errors are already logged. */ continue; } - /* Should not have a NULL app on success. */ - assert(app); /* - * Since this is a command socket (write then read), - * only add poll error event to only detect shutdown. + * new_app_socket's ownership has been + * transferred to the new agent app. */ - ret = lttng_poll_add(&events, new_fd, + new_app = agent_create_app(new_app_id.pid, + new_app_id.domain, + new_app_socket); + if (!new_app) { + new_app_socket->ops->close( + new_app_socket); + continue; + } + new_app_socket_fd = new_app_socket->fd; + new_app_socket = NULL; + + /* + * Since this is a command socket (write then + * read), only add poll error event to only + * detect shutdown. + */ + ret = lttng_poll_add(&events, new_app_socket_fd, LPOLLERR | LPOLLHUP | LPOLLRDHUP); if (ret < 0) { - agent_destroy_app_by_sock(new_fd); + agent_destroy_app(new_app); continue; } - /* Update newly registered app. */ - update_agent_app(app); + /* + * Prevent sessions from being modified while + * the agent application's configuration is + * updated. + */ + session_lock_list(); + + /* + * Update the newly registered applications's + * configuration. + */ + update_agent_app(new_app); - /* On failure, the poll will detect it and clean it up. */ - ret = agent_send_registration_done(app); + ret = agent_send_registration_done(new_app); if (ret < 0) { - /* Removing from the poll set */ - ret = lttng_poll_del(&events, new_fd); + agent_destroy_app(new_app); + /* Removing from the poll set. */ + ret = lttng_poll_del(&events, + new_app_socket_fd); if (ret < 0) { + session_unlock_list(); goto error; } - agent_destroy_app_by_sock(new_fd); continue; } + + /* Publish the new agent app. */ + agent_add_app(new_app); + + session_unlock_list(); } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { /* Removing from the poll set */ ret = lttng_poll_del(&events, pollfd); diff --git a/src/bin/lttng-sessiond/agent.c b/src/bin/lttng-sessiond/agent.c index 1926b0820f..e5978be5ce 100644 --- a/src/bin/lttng-sessiond/agent.c +++ b/src/bin/lttng-sessiond/agent.c @@ -380,7 +380,7 @@ static ssize_t list_events(struct agent_app *app, struct lttng_event **events) * * Return LTTNG_OK on success or else a LTTNG_ERR* code. */ -static int enable_event(struct agent_app *app, struct agent_event *event) +static int enable_event(const struct agent_app *app, struct agent_event *event) { int ret; char *bytes_to_send; @@ -495,8 +495,8 @@ int send_pstring(struct lttcomm_sock *sock, const char *str, uint32_t len) * * Return LTTNG_OK on success or else a LTTNG_ERR* code. */ -static int app_context_op(struct agent_app *app, - struct agent_app_ctx *ctx, enum lttcomm_agent_command cmd) +static int app_context_op(const struct agent_app *app, + const struct agent_app_ctx *ctx, enum lttcomm_agent_command cmd) { int ret; uint32_t reply_ret_code; @@ -946,7 +946,7 @@ struct agent_app *agent_create_app(pid_t pid, enum lttng_domain_type domain, app = zmalloc(sizeof(*app)); if (!app) { - PERROR("zmalloc agent create"); + PERROR("Failed to allocate agent application instance"); goto error; } @@ -1402,26 +1402,24 @@ void agent_app_ht_clean(void) * Note that this function is most likely to be used with a tracing session * thus the caller should make sure to hold the appropriate lock(s). */ -void agent_update(struct agent *agt, int sock) +void agent_update(const struct agent *agt, const struct agent_app *app) { int ret; - struct agent_app *app; struct agent_event *event; struct lttng_ht_iter iter; struct agent_app_ctx *ctx; assert(agt); - assert(sock >= 0); + assert(app); - DBG("Agent updating app socket %d", sock); + DBG("Agent updating app: pid = %ld", (long) app->pid); rcu_read_lock(); - app = agent_find_app_by_sock(sock); /* * We are in the registration path thus if the application is gone, * there is a serious code flow error. */ - assert(app); + cds_lfht_for_each_entry(agt->events->ht, &iter.iter, event, node.node) { /* Skip event if disabled. */ if (!event->enabled) { diff --git a/src/bin/lttng-sessiond/agent.h b/src/bin/lttng-sessiond/agent.h index 2d2d642555..f8e67efdae 100644 --- a/src/bin/lttng-sessiond/agent.h +++ b/src/bin/lttng-sessiond/agent.h @@ -163,7 +163,7 @@ int agent_enable_event(struct agent_event *event, enum lttng_domain_type domain); int agent_disable_event(struct agent_event *event, enum lttng_domain_type domain); -void agent_update(struct agent *agt, int sock); +void agent_update(const struct agent *agt, const struct agent_app *app); int agent_list_events(struct lttng_event **events, enum lttng_domain_type domain); diff --git a/src/bin/lttng-sessiond/session.h b/src/bin/lttng-sessiond/session.h index a58de18f1d..1df70a4747 100644 --- a/src/bin/lttng-sessiond/session.h +++ b/src/bin/lttng-sessiond/session.h @@ -195,13 +195,22 @@ struct ltt_session { char *base_path; }; -/* Prototypes */ enum lttng_error_code session_create(const char *name, uid_t uid, gid_t gid, struct ltt_session **out_session); void session_lock(struct ltt_session *session); +void session_unlock(struct ltt_session *session); + +/* + * The session list lock covers more ground than its name implies. While + * it does protect against concurent mutations of the session list, it is + * also used as a multi-session lock when synchronizing newly-registered + * 'user space tracer' and 'agent' applications. + * + * In other words, it prevents session configurations from changing while they + * are being transmitted to the various applications. + */ void session_lock_list(void); int session_trylock_list(void); -void session_unlock(struct ltt_session *session); void session_unlock_list(void); void session_destroy(struct ltt_session *session);