Skip to content

Commit

Permalink
[TA4892]log(debug): for data/mgmt connections in zrepl (mayadata-io#194
Browse files Browse the repository at this point in the history
…) (mayadata-io#196)

Signed-off-by: mayank <mayank.patel@cloudbyte.com>
  • Loading branch information
mynktl authored and vishnuitta committed Feb 13, 2019
1 parent 92f660f commit 66a84f1
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 42 deletions.
90 changes: 53 additions & 37 deletions lib/libzrepl/data_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,11 @@ uzfs_zvol_socket_read(int fd, char *buf, uint64_t nbytes)
if (count < 0) {
if (errno == EINTR)
continue;
LOG_ERRNO("Socket read error");
LOG_ERRNO("Socket(%d) read error", fd);
return (-1);
} else if (count == 0) {
LOG_INFO("Connection closed by the peer");
LOG_INFO("Connection closed by the peer for socket(%d)",
fd);
return (-1);
}
p += count;
Expand Down Expand Up @@ -566,7 +567,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)

if ((rc = setsockopt(sfd, SOL_SOCKET, SO_LINGER, &lo, sizeof (lo)))
!= 0) {
LOG_ERRNO("setsockopt failed");
LOG_ERRNO("setsockopt failed for sock(%d)", sfd);
goto exit;
}

Expand All @@ -577,7 +578,8 @@ uzfs_zvol_rebuild_dw_replica(void *arg)

if ((rc = connect(sfd, (struct sockaddr *)&replica_ip,
sizeof (replica_ip))) != 0) {
LOG_ERRNO("connect failed");
LOG_ERRNO("connect failed on fd(%d) to %s:%u", sfd,
rebuild_args->ip, rebuild_args->port);
perror("connect");
goto exit;
}
Expand Down Expand Up @@ -605,21 +607,24 @@ uzfs_zvol_rebuild_dw_replica(void *arg)

rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
if (rc != 0) {
LOG_ERR("Socket hdr write failed");
LOG_ERR("Socket hdr write failed for fd(%d) during %s "
"rebuilding", sfd, zinfo->name);
goto exit;
}

rc = uzfs_zvol_socket_write(sfd, (void *)rebuild_args->zvol_name,
hdr.len);
if (rc != 0) {
LOG_ERR("Socket handshake write failed");
LOG_ERR("Socket handshake write failed for fd(%d) during %s "
"rebuilding", sfd, zinfo->name);
goto exit;
}

next_step:

if (ZVOL_IS_REBUILDING_ERRORED(zinfo->main_zv)) {
LOG_ERR("rebuilding errored.. for %s..", zinfo->name);
LOG_ERR("rebuilding errored.. for %s.. on sock(%d)",
zinfo->name, sfd);
rc = -1;
goto exit;
}
Expand All @@ -634,8 +639,8 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
rc = uzfs_zvol_handle_rebuild_snap_done(&hdr,
sfd, zinfo);
if (rc != 0) {
LOG_ERR("Rebuild snap_done failed.. for %s",
zinfo->name);
LOG_ERR("Rebuild snap_done failed.. for %s on fd(%d)",
zinfo->name, sfd);
goto exit;
}
offset = 0;
Expand All @@ -653,7 +658,8 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
if (rc != 0) {
LOG_ERRNO("Socket rebuild_complete write failed, but,"
"counting as success with this replica");
"counting as success with this replica(%s) on "
"fd(%d)", zinfo->name, sfd);
rc = 0;
goto exit;
} else if (all_snap_done == B_TRUE) {
Expand Down Expand Up @@ -681,7 +687,8 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
hdr.len = zvol_rebuild_step_size;
rc = uzfs_zvol_socket_write(sfd, (char *)&hdr, sizeof (hdr));
if (rc != 0) {
LOG_ERR("Socket rebuild_step write failed");
LOG_ERR("Socket rebuild_step write failed for %s on "
"fd(%d)", zvol_state->zv_name, sfd);
goto exit;
}
}
Expand Down Expand Up @@ -775,12 +782,13 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
rc = uzfs_zinfo_rebuild_from_clone(zinfo);
if (rc != 0) {
LOG_ERR("Rebuild from clone for vol %s "
"failed", zinfo->name);
"failed on fd(%d)", zinfo->name,
sfd);
rc = -1;
goto exit;
}
LOG_INFO("Rebuild started from clone for vol "
"%s", zinfo->name);
"%s on fd(%d)", zinfo->name, sfd);
}
continue;
}
Expand All @@ -802,7 +810,8 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
atomic_inc_64(&zinfo->dispatched_io_cnt);
uzfs_zvol_worker(zio_cmd);
if (zio_cmd->hdr.status != ZVOL_OP_STATUS_OK) {
LOG_ERR("rebuild IO failed.. for %s..", zinfo->name);
LOG_ERR("rebuild IO failed.. for %s.. on fd(%d)",
zinfo->name, sfd);
rc = -1;
goto exit;
}
Expand All @@ -818,7 +827,7 @@ uzfs_zvol_rebuild_dw_replica(void *arg)
ZVOL_REBUILDING_ERRORED);
(zinfo->main_zv->rebuild_info.rebuild_failed_cnt) += 1;
LOG_ERR("uzfs_zvol_rebuild_dw_replica thread exiting, "
"rebuilding failed zvol: %s", zinfo->name);
"rebuilding failed zvol: %s on fd(%d)", zinfo->name, sfd);
}

uint8_t wquiesce = 0;
Expand Down Expand Up @@ -1169,7 +1178,7 @@ uzfs_zvol_io_conn_acceptor(void *arg)
* (why were we notified then?)
*/
if ((events[i].events & (~EPOLLIN)) != 0) {
LOG_ERRNO("epoll failed");
LOG_ERRNO("epoll failed for io acceptor");
if (events[i].data.fd == io_sfd) {
io_sfd = -1;
} else {
Expand All @@ -1190,7 +1199,7 @@ uzfs_zvol_io_conn_acceptor(void *arg)
in_len = sizeof (in_addr);
new_fd = accept(events[i].data.fd, &in_addr, &in_len);
if (new_fd == -1) {
LOG_ERRNO("accept failed");
LOG_ERRNO("accept failed for io acceptor");
continue;
}
#ifdef DEBUG
Expand Down Expand Up @@ -1222,7 +1231,8 @@ uzfs_zvol_io_conn_acceptor(void *arg)
(void *)new_fd, 0, NULL, TS_RUN, 0,
PTHREAD_CREATE_DETACHED);
} else {
LOG_INFO("New rebuild connection");
LOG_INFO("New rebuild connection on fd %d",
new_fd);
thrd_info = zk_thread_create(NULL, 0,
(thread_func_t)rebuild_scanner,
(void *)new_fd, 0, NULL, TS_RUN, 0,
Expand Down Expand Up @@ -1355,8 +1365,8 @@ uzfs_zvol_create_internal_snapshot(zvol_state_t *zv, zvol_state_t **snap_zv,
goto again;
}
if (ret != 0)
LOG_ERR("Failed to get info about %s@%s",
zv->zv_name, snap_name);
LOG_ERR("Failed to get info about %s@%s err(%d)",
zv->zv_name, snap_name, ret);
strfree(snap_name);
return (ret);
}
Expand Down Expand Up @@ -1388,7 +1398,7 @@ uzfs_zvol_rebuild_scanner(void *arg)

if ((rc = setsockopt(fd, SOL_SOCKET, SO_LINGER, &lo, sizeof (lo)))
!= 0) {
LOG_ERRNO("setsockopt failed");
LOG_ERRNO("setsockopt failed for fd(%d)", fd);
goto exit;
}
read_socket:
Expand Down Expand Up @@ -1421,30 +1431,33 @@ uzfs_zvol_rebuild_scanner(void *arg)
name = kmem_alloc(hdr.len, KM_SLEEP);
rc = uzfs_zvol_socket_read(fd, name, hdr.len);
if (rc != 0) {
LOG_ERR("Error reading zvol name");
LOG_ERR("Error reading zvol name from fd(%d)",
fd);
kmem_free(name, hdr.len);
goto exit;
}

/* Handshake already happened */
if (zinfo != NULL) {
LOG_ERR("Second handshake on %s connection for "
"zvol %s",
zinfo->name, name);
"zvol %s on fd(%d)",
zinfo->name, name, fd);
kmem_free(name, hdr.len);
rc = -1;
goto exit;
}

zinfo = uzfs_zinfo_lookup(name);
if (zinfo == NULL) {
LOG_ERR("zvol %s not found", name);
LOG_ERR("zvol %s not found for fd(%d)", name,
fd);
kmem_free(name, hdr.len);
rc = -1;
goto exit;
}

LOG_INFO("Rebuild scanner started on zvol %s", name);
LOG_INFO("Rebuild scanner started on zvol %s from "
"sock(%d)", name, fd);

uzfs_zvol_append_to_fd_list(zinfo, fd);
zinfo->rebuild_cmd_queued_cnt =
Expand All @@ -1462,9 +1475,10 @@ uzfs_zvol_rebuild_scanner(void *arg)
rebuild_req_len = hdr.len;

LOG_INFO("Checkpointed IO_seq: %ld, "
"Rebuild Req offset: %ld, Rebuild Req length: %ld",
"Rebuild Req offset: %ld, Rebuild Req length: %ld "
"for zvol(%s)",
metadata.io_num, rebuild_req_offset,
rebuild_req_len);
rebuild_req_len, zinfo->name);
#if DEBUG
if (inject_error.delay.helping_replica_rebuild_step
== 1)
Expand Down Expand Up @@ -1568,8 +1582,8 @@ uzfs_zvol_rebuild_scanner(void *arg)
rc = dsl_destroy_snapshot(snap_name,
B_FALSE);
if (rc != 0)
LOG_ERR("snapdestroyfail %s %d",
snap_name, rc);
LOG_ERR("snap destroy failed %s"
" err(%d)", snap_name, rc);
strfree(snap_name);
}
snap_zv = NULL;
Expand All @@ -1579,7 +1593,8 @@ uzfs_zvol_rebuild_scanner(void *arg)
}

default:
LOG_ERR("Wrong opcode: %d", hdr.opcode);
LOG_ERR("Wrong opcode: %d during rebuild on sock(%d)",
hdr.opcode, fd);
goto exit;
}

Expand All @@ -1600,21 +1615,22 @@ uzfs_zvol_rebuild_scanner(void *arg)
#endif
rc = dsl_destroy_snapshot(snap_name, B_FALSE);
if (rc != 0)
LOG_ERR("snap destroy fail %s err: %d",
LOG_ERR("snap destroy failed %s err: %d",
snap_name, rc);
strfree(snap_name);
}
snap_zv = NULL;
}

if (zinfo != NULL) {
LOG_INFO("Closing rebuild connection for zvol %s", zinfo->name);
LOG_INFO("Closing rebuild connection for zvol %s from sock(%d)",
zinfo->name, fd);
remove_pending_cmds_to_ack(fd, zinfo);
uzfs_zvol_remove_from_fd_list(zinfo, fd);

uzfs_zinfo_drop_refcnt(zinfo);
} else {
LOG_INFO("Closing rebuild connection");
LOG_INFO("Closing rebuild connection on sock(%d)", fd);
}

shutdown(fd, SHUT_RDWR);
Expand Down Expand Up @@ -2138,9 +2154,9 @@ uzfs_zvol_io_receiver(void *arg)
*/
while (zinfo->conn_closed || zinfo->is_io_ack_sender_created) {
(void) pthread_mutex_unlock(&zinfo->zinfo_mutex);
LOG_INFO("Waiting for conn_closed (%d) and ack_sender (%d)"
"to be false for %s", zinfo->conn_closed,
zinfo->is_io_ack_sender_created, zinfo->name);
LOG_INFO("Waiting for conn_closed(%d) and ack_sender (%d)"
"for fd(%d) to be false for %s", zinfo->conn_closed,
zinfo->is_io_ack_sender_created, fd, zinfo->name);
sleep(1);
(void) pthread_mutex_lock(&zinfo->zinfo_mutex);
}
Expand Down
18 changes: 13 additions & 5 deletions lib/libzrepl/mgmt_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,11 @@ finish_async_tasks(void)
}
SLIST_REMOVE(&async_tasks, async_task, async_task, task_next);
free_async_task(async_task);
if (rc != 0)
if (rc != 0) {
LOG_ERR("replying to client returned error(%d)",
async_task->conn->conn_fd);
break;
}
}
mutex_exit(&async_tasks_mtx);
return (rc);
Expand Down Expand Up @@ -1706,10 +1709,12 @@ uzfs_zvol_mgmt_thread(void *arg)

if (events[i].events & EPOLLERR) {
if (conn->conn_state == CS_CONNECT) {
LOG_ERR("Failed to connect to %s:%d",
conn->conn_host, conn->conn_port);
LOG_ERR("Failed to connect to %s:%d "
"fd(%d)", conn->conn_host,
conn->conn_port, conn->conn_fd);
} else {
LOGERRCONN(conn, "Error on connection");
LOGERRCONN(conn, "Error on connection "
"for sock(%d)", conn->conn_fd);
}
if (close_conn(conn) != 0) {
goto exit;
Expand Down Expand Up @@ -1739,6 +1744,8 @@ uzfs_zvol_mgmt_thread(void *arg)
if (cnt == 0) {
/* the other peer closed the conn */
if (events[i].events & EPOLLIN) {
LOG_ERR("connection closed for "
"fd(%d)", conn->conn_fd);
if (close_conn(conn) != 0) {
goto exit;
}
Expand All @@ -1749,7 +1756,8 @@ uzfs_zvol_mgmt_thread(void *arg)
errno == EINTR) {
continue;
}
perror("read/write");
LOG_ERR("Read/Write error(%d) on "
"fd(%d)", errno, conn->conn_fd);
if (close_conn(conn) != 0) {
goto exit;
}
Expand Down

0 comments on commit 66a84f1

Please sign in to comment.