Skip to content

Commit

Permalink
u3: better recovery from crashes during snapshot patch application (#651
Browse files Browse the repository at this point in the history
)

@Fang- discovered this longstanding problem of overeager snapshot
validation preventing crash recovery.
  • Loading branch information
joemfb authored Jul 15, 2024
2 parents 1fd6a16 + ecf3c55 commit 58aa31c
Showing 1 changed file with 34 additions and 21 deletions.
55 changes: 34 additions & 21 deletions pkg/noun/events.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,37 +324,43 @@ u3e_fault(u3_post low_p, u3_post hig_p, u3_post off_p)
return u3e_flaw_good;
}

typedef enum {
_ce_img_good = 0,
_ce_img_fail = 1,
_ce_img_size = 2
} _ce_img_stat;

/* _ce_image_stat(): measure image.
*/
static c3_o
static _ce_img_stat
_ce_image_stat(u3e_image* img_u, c3_w* pgs_w)
{
struct stat buf_u;

if ( -1 == fstat(img_u->fid_i, &buf_u) ) {
fprintf(stderr, "loom: stat %s: %s\r\n", img_u->nam_c, strerror(errno));
u3_assert(0);
return c3n;
return _ce_img_fail;
}
else {
c3_z siz_z = buf_u.st_size;
c3_z pgs_z = (siz_z + (_ce_page - 1)) >> (u3a_page + 2);

if ( !siz_z ) {
*pgs_w = 0;
return c3y;
return _ce_img_good;
}
else if ( siz_z != _ce_len(pgs_z) ) {
fprintf(stderr, "loom: %s corrupt size %zu\r\n", img_u->nam_c, siz_z);
return c3n;
return _ce_img_size;
}
else if ( pgs_z > UINT32_MAX ) {
fprintf(stderr, "loom: %s overflow %zu\r\n", img_u->nam_c, siz_z);
return c3n;
return _ce_img_fail;
}
else {
*pgs_w = (c3_w)pgs_z;
return c3y;
return _ce_img_good;
}
}
}
Expand Down Expand Up @@ -397,7 +403,7 @@ _ce_ephemeral_open(c3_i* eph_i)

/* _ce_image_open(): open or create image.
*/
static c3_o
static _ce_img_stat
_ce_image_open(u3e_image* img_u, c3_c* ful_c)
{
c3_i mod_i = O_RDWR | O_CREAT;
Expand All @@ -406,14 +412,10 @@ _ce_image_open(u3e_image* img_u, c3_c* ful_c)
snprintf(pax_c, 8192, "%s/%s.bin", ful_c, img_u->nam_c);
if ( -1 == (img_u->fid_i = c3_open(pax_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", pax_c, strerror(errno));
return c3n;
}
else if ( c3n == _ce_image_stat(img_u, &img_u->pgs_w) ) {
return c3n;
}
else {
return c3y;
return _ce_img_fail;
}

return _ce_image_stat(img_u, &img_u->pgs_w);
}

/* _ce_patch_write_control(): write control block file.
Expand Down Expand Up @@ -1371,13 +1373,18 @@ u3e_backup(c3_c* pux_c, c3_c* pax_c, c3_o ovw_o)
//
c3_c nux_c[8193];
snprintf(nux_c, 8192, "%s/%s.bin", pux_c, nux_u.nam_c);
if ( (0 != access(nux_c, F_OK)) || (c3n == _ce_image_open(&nux_u, pux_c)) ) {
if ( (0 != access(nux_c, F_OK))
|| (_ce_img_good != _ce_image_open(&nux_u, pux_c)) )
{
fprintf(stderr, "loom: couldn't open north image at %s\r\n", pux_c);
return c3n;
}

c3_c sux_c[8193];
snprintf(sux_c, 8192, "%s/%s.bin", pux_c, sux_u.nam_c);
if ( (0 != access(sux_c, F_OK)) || (c3n == _ce_image_open(&sux_u, pux_c)) ) {
if ( (0 != access(sux_c, F_OK))
|| (_ce_img_good != _ce_image_open(&sux_u, pux_c)) )
{
fprintf(stderr, "loom: couldn't open south image at %s\r\n", pux_c);
return c3n;
}
Expand Down Expand Up @@ -1487,9 +1494,9 @@ u3e_save(u3_post low_p, u3_post hig_p)
#ifdef U3_SNAPSHOT_VALIDATION
{
c3_w pgs_w;
u3_assert( c3y == _ce_image_stat(&u3P.nor_u, &pgs_w) );
u3_assert( _ce_img_good == _ce_image_stat(&u3P.nor_u, &pgs_w) );
u3_assert( pgs_w == u3P.nor_u.pgs_w );
u3_assert( c3y == _ce_image_stat(&u3P.sou_u, &pgs_w) );
u3_assert( _ce_img_good == _ce_image_stat(&u3P.sou_u, &pgs_w) );
u3_assert( pgs_w == u3P.sou_u.pgs_w );
}
#endif
Expand Down Expand Up @@ -1590,9 +1597,11 @@ u3e_live(c3_o nuu_o, c3_c* dir_c)
//
c3_c chk_c[8193];
snprintf(chk_c, 8193, "%s/.urb/chk", u3P.dir_c);
if ( (c3n == _ce_image_open(&u3P.nor_u, chk_c)) ||
(c3n == _ce_image_open(&u3P.sou_u, chk_c)) )
{

_ce_img_stat nor_e = _ce_image_open(&u3P.nor_u, chk_c);
_ce_img_stat sou_e = _ce_image_open(&u3P.sou_u, chk_c);

if ( (_ce_img_fail == nor_e) || (_ce_img_fail == sou_e) ) {
fprintf(stderr, "boot: image failed\r\n");
exit(1);
}
Expand All @@ -1609,6 +1618,10 @@ u3e_live(c3_o nuu_o, c3_c* dir_c)
_ce_patch_free(pat_u);
_ce_patch_delete();
}
else if ( (_ce_img_size == nor_e) || (_ce_img_size == sou_e) ) {
fprintf(stderr, "boot: image failed (size)\r\n");
exit(1);
}

nor_w = u3P.nor_u.pgs_w;
sou_w = u3P.sou_u.pgs_w;
Expand Down

0 comments on commit 58aa31c

Please sign in to comment.