Skip to content

Commit

Permalink
ostree repo commit: Speed up composing trees with --tree=ref
Browse files Browse the repository at this point in the history
Running `ostree commit --tree=ref=a --tree=dir=b` involves reading the
whole of a into an `OstreeMutableTree` before composing `b` on top.  This
is inefficient if a is a complete rootfs and b is just touching one file.
We process O(size of a + size of b) directories rather than
O(number of touched dirs).

This commit makes `ostree commit` more efficient at composing multiple
directories together.  With `ostree_mutable_tree_fill_empty_from_dirtree`
we create a lazy `OstreeMutableTree` which only reads the underlying
information from disk when needed.  We don't need to read all the
subdirectories just to get the checksum of a tree already checked into the
repo.

This provides great speedups when composing a rootfs out of multiple other
rootfs as we do in our build system.  We compose multiple containers
together with:

    ostree commit --tree=ref=base-rootfs --tree=ref=container1 --tree=ref=container2

and it is much faster now.

As a test I ran

    time ostree --repo=... commit --orphan --tree=ref=big-rootfs --tree=dir=modified_etc

Where modified_etc contained a modified sudoers file under /etc.  I used
`strace` to count syscalls and I seperatly took timing measurements.  To
test with a cold cache I ran

    sync && echo 3 | sudo tee /proc/sys/vm/drop_caches

Results:

|                      | Before | After |
| -------------------- | ------ | ----- |
| Time (cold cache)    |   8.1s | 0.12s |
| Time (warm cache)    |   3.7s | 0.08s |
| `openat` calls       |  53589 |   246 |
| `fgetxattr` calls    |  78916 |     0 |

I'm not sure if this will have some negative interaction with the
`_ostree_repo_commit_modifier_apply` which is short-circuited here.  I
think it was disabled for `--tree=ref=` anyway, but I'm not certain.  All
the tests pass anyway.

I originally implemented this in terms of the `OstreeRepoFile` APIs, but
it was *way* less efficient, opening and reading many files unnecessarily.
  • Loading branch information
wmanley committed Jun 30, 2018
1 parent 488365f commit 3d8a8ac
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 4 deletions.
2 changes: 2 additions & 0 deletions apidoc/ostree-sections.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ OstreeLzmaDecompressorClass
<FILE>ostree-mutable-tree</FILE>
OstreeMutableTree
ostree_mutable_tree_new
ostree_mutable_tree_new_from_checksum
ostree_mutable_tree_set_metadata_checksum
ostree_mutable_tree_get_metadata_checksum
ostree_mutable_tree_set_contents_checksum
Expand All @@ -261,6 +262,7 @@ ostree_mutable_tree_ensure_parent_dirs
ostree_mutable_tree_walk
ostree_mutable_tree_get_subdirs
ostree_mutable_tree_get_files
ostree_mutable_tree_fill_empty_from_dirtree
<SUBSECTION Standard>
OSTREE_MUTABLE_TREE
OSTREE_IS_MUTABLE_TREE
Expand Down
3 changes: 3 additions & 0 deletions src/libostree/libostree-devel.sym
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@

/* Add new symbols here. Release commits should copy this section into -released.sym. */
LIBOSTREE_2018.7 {
global:
ostree_mutable_tree_fill_empty_from_dirtree;
ostree_mutable_tree_new_from_checksum;
} LIBOSTREE_2018.6;

/* Stub section for the stable release *after* this development one; don't
Expand Down
214 changes: 210 additions & 4 deletions src/libostree/ostree-mutable-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include "otutil.h"
#include "ostree.h"

#include "ostree-core-private.h"

/**
* SECTION:ostree-mutable-tree
* @title: In-memory modifiable filesystem tree
Expand All @@ -38,6 +40,15 @@
* programmatically.
*/

typedef enum {
MTREE_STATE_WHOLE,

/* MTREE_STATE_LAZY allows us to not read files and subdirs from the objects
* on disk until they're actually needed - often they won't be needed at
* all. */
MTREE_STATE_LAZY
} OstreeMutableTreeState;

/**
* OstreeMutableTree:
*
Expand All @@ -55,6 +66,8 @@ struct OstreeMutableTree
* it sets parent = NULL on all its children (see remove_child_mtree) */
OstreeMutableTree *parent;

OstreeMutableTreeState state;

/* This is the checksum of the Dirtree object that corresponds to the current
* contents of this directory. contents_checksum can be NULL if the SHA was
* never calculated or contents of this mtree or any subdirectory has been
Expand All @@ -71,7 +84,14 @@ struct OstreeMutableTree
* and xattrs of this directory. This can be NULL. */
char *metadata_checksum;

/* const char* filename -> const char* checksum */
/* ======== Valid for state LAZY: =========== */

/* The repo so we can look up the checksums. */
OstreeRepo *repo;

/* ======== Valid for state WHOLE: ========== */

/* const char* filename -> const char* checksum. */
GHashTable *files;

/* const char* filename -> OstreeMutableTree* subtree */
Expand All @@ -80,8 +100,6 @@ struct OstreeMutableTree

G_DEFINE_TYPE (OstreeMutableTree, ostree_mutable_tree, G_TYPE_OBJECT)

static void invalidate_contents_checksum (OstreeMutableTree *self);

static void
ostree_mutable_tree_finalize (GObject *object)
{
Expand All @@ -95,6 +113,8 @@ ostree_mutable_tree_finalize (GObject *object)
g_hash_table_destroy (self->files);
g_hash_table_destroy (self->subdirs);

g_clear_object (&self->repo);

G_OBJECT_CLASS (ostree_mutable_tree_parent_class)->finalize (object);
}

Expand All @@ -117,7 +137,6 @@ insert_child_mtree (OstreeMutableTree *self, const gchar* name,
g_assert_null (child->parent);
g_hash_table_insert (self->subdirs, g_strdup (name), child);
child->parent = self;
invalidate_contents_checksum (self);
}

static void
Expand All @@ -139,6 +158,7 @@ ostree_mutable_tree_init (OstreeMutableTree *self)
g_free, g_free);
self->subdirs = g_hash_table_new_full (g_str_hash, g_str_equal,
g_free, remove_child_mtree);
self->state = MTREE_STATE_WHOLE;
}

static void
Expand All @@ -153,6 +173,83 @@ invalidate_contents_checksum (OstreeMutableTree *self)
}
}

/* Go from state LAZY to state WHOLE by reading the tree from disk */
static gboolean
_ostree_mutable_tree_make_whole (OstreeMutableTree *self,
GCancellable *cancellable,
GError **error)
{
if (self->state == MTREE_STATE_WHOLE)
return TRUE;

g_assert_cmpuint (self->state, ==, MTREE_STATE_LAZY);
g_assert_nonnull (self->repo);
g_assert_nonnull (self->contents_checksum);
g_assert_nonnull (self->metadata_checksum);
g_assert_cmpuint (g_hash_table_size (self->files), ==, 0);
g_assert_cmpuint (g_hash_table_size (self->subdirs), ==, 0);

g_autoptr(GVariant) dirtree = NULL;
if (!ostree_repo_load_variant (self->repo, OSTREE_OBJECT_TYPE_DIR_TREE,
self->contents_checksum, &dirtree, error))
return FALSE;

{
g_autoptr(GVariant) dir_file_contents = g_variant_get_child_value (dirtree, 0);
GVariantIter viter;
g_variant_iter_init (&viter, dir_file_contents);
const char *fname;
GVariant *contents_csum_v = NULL;
while (g_variant_iter_loop (&viter, "(&s@ay)", &fname, &contents_csum_v))
{
char tmp_checksum[OSTREE_SHA256_STRING_LEN + 1];
_ostree_checksum_inplace_from_bytes_v (contents_csum_v, tmp_checksum);
g_hash_table_insert (self->files, g_strdup (fname),
g_strdup (tmp_checksum));
}
}

/* Process subdirectories */
{
g_autoptr(GVariant) dir_subdirs = g_variant_get_child_value (dirtree, 1);
const char *dname;
GVariant *subdirtree_csum_v = NULL;
GVariant *subdirmeta_csum_v = NULL;
GVariantIter viter;
g_variant_iter_init (&viter, dir_subdirs);
while (g_variant_iter_loop (&viter, "(&s@ay@ay)", &dname,
&subdirtree_csum_v, &subdirmeta_csum_v))
{
char subdirtree_checksum[OSTREE_SHA256_STRING_LEN+1];
_ostree_checksum_inplace_from_bytes_v (subdirtree_csum_v, subdirtree_checksum);
char subdirmeta_checksum[OSTREE_SHA256_STRING_LEN+1];
_ostree_checksum_inplace_from_bytes_v (subdirmeta_csum_v, subdirmeta_checksum);
insert_child_mtree (self, dname, ostree_mutable_tree_new_from_checksum (
self->repo, subdirtree_checksum, subdirmeta_checksum));
}
}

g_clear_object (&self->repo);
self->state = MTREE_STATE_WHOLE;
return TRUE;
}

/* _ostree_mutable_tree_make_whole can fail if state == MTREE_STATE_LAZY, but
* we have getters that preceed the existence of MTREE_STATE_LAZY which can't
* return errors. So instead this function will fail and print a warning. */
static void
_assert_ostree_mutable_tree_make_whole (OstreeMutableTree *self)
{
GError *err = NULL;
if (!_ostree_mutable_tree_make_whole (self, NULL, &err))
{
g_warning ("Failed to make lazy OstreeMutableTree whole and no way to "
"report error to caller. Error was: %s",
err ? err->message : "unknown");
g_return_if_reached ();
}
}

void
ostree_mutable_tree_set_metadata_checksum (OstreeMutableTree *self,
const char *checksum)
Expand Down Expand Up @@ -183,6 +280,8 @@ ostree_mutable_tree_set_contents_checksum (OstreeMutableTree *self,
"already has a checksum set. Old checksum %s, new checksum %s",
self->contents_checksum, checksum);

_assert_ostree_mutable_tree_make_whole (self);

g_free (self->contents_checksum);
self->contents_checksum = g_strdup (checksum);
}
Expand Down Expand Up @@ -215,6 +314,9 @@ ostree_mutable_tree_replace_file (OstreeMutableTree *self,
if (!ot_util_filename_validate (name, error))
goto out;

if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;

if (g_hash_table_lookup (self->subdirs, name))
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
Expand Down Expand Up @@ -256,6 +358,9 @@ ostree_mutable_tree_ensure_dir (OstreeMutableTree *self,
if (!ot_util_filename_validate (name, error))
goto out;

if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;

if (g_hash_table_lookup (self->files, name))
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
Expand All @@ -267,6 +372,7 @@ ostree_mutable_tree_ensure_dir (OstreeMutableTree *self,
if (!ret_dir)
{
ret_dir = ostree_mutable_tree_new ();
invalidate_contents_checksum (self);
insert_child_mtree (self, name, g_object_ref (ret_dir));
}

Expand All @@ -287,6 +393,9 @@ ostree_mutable_tree_lookup (OstreeMutableTree *self,
g_autoptr(OstreeMutableTree) ret_subdir = NULL;
g_autofree char *ret_file_checksum = NULL;

if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;

ret_subdir = ot_gobject_refz (g_hash_table_lookup (self->subdirs, name));
if (!ret_subdir)
{
Expand Down Expand Up @@ -328,6 +437,9 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self,
OstreeMutableTree *subdir = self; /* nofree */
g_autoptr(OstreeMutableTree) ret_parent = NULL;

if (!_ostree_mutable_tree_make_whole (self, NULL, error))
goto out;

g_assert (metadata_checksum != NULL);

if (!self->metadata_checksum)
Expand All @@ -348,6 +460,7 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self,
next = g_hash_table_lookup (subdir->subdirs, name);
if (!next)
{
invalidate_contents_checksum (subdir);
next = ostree_mutable_tree_new ();
ostree_mutable_tree_set_metadata_checksum (next, metadata_checksum);
insert_child_mtree (subdir, g_strdup (name), next);
Expand All @@ -364,6 +477,71 @@ ostree_mutable_tree_ensure_parent_dirs (OstreeMutableTree *self,
return ret;
}

const char empty_tree_csum[] = "6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d";

/**
* ostree_mutable_tree_fill_empty_from_dirtree:
*
* Merges @self with the tree given by @contents_checksum and
* @metadata_checksum, but only if it's possible without writing new objects to
* the @repo. We can do this if either @self is empty, the tree given by
* @contents_checksum is empty or if both trees already have the same
* @contents_checksum.
*
* Returns: @TRUE if merge was successful, @FALSE if it was not possible.
*
* This function enables optimisations when composing trees. The provided
* checksums are not loaded or checked when this function is called. Instead
* the contents will be loaded only when needed.
*/
gboolean
ostree_mutable_tree_fill_empty_from_dirtree (OstreeMutableTree *self,
OstreeRepo *repo,
const char *contents_checksum,
const char *metadata_checksum)
{
g_return_val_if_fail (repo, FALSE);
g_return_val_if_fail (contents_checksum, FALSE);
g_return_val_if_fail (metadata_checksum, FALSE);

switch (self->state)
{
case MTREE_STATE_LAZY:
{
if (g_strcmp0 (contents_checksum, self->contents_checksum) == 0 ||
g_strcmp0 (empty_tree_csum, self->contents_checksum) == 0)
break;

if (g_strcmp0 (empty_tree_csum, contents_checksum) == 0)
{
/* Adding an empty tree to a full one - stick with the old contents */
contents_checksum = self->contents_checksum;
break;
}
else
return FALSE;
}
case MTREE_STATE_WHOLE:
if (g_hash_table_size (self->files) == 0 &&
g_hash_table_size (self->subdirs) == 0)
break;
/* We're not empty - can't convert to a LAZY tree */
return FALSE;
default:
g_assert_not_reached ();
}

self->state = MTREE_STATE_LAZY;
g_set_object (&self->repo, repo);
ostree_mutable_tree_set_metadata_checksum (self, metadata_checksum);
if (g_strcmp0 (self->contents_checksum, contents_checksum) != 0)
{
invalidate_contents_checksum (self);
self->contents_checksum = g_strdup (contents_checksum);
}
return TRUE;
}

/**
* ostree_mutable_tree_walk:
* @self: Tree
Expand Down Expand Up @@ -392,6 +570,8 @@ ostree_mutable_tree_walk (OstreeMutableTree *self,
else
{
OstreeMutableTree *subdir;
if (!_ostree_mutable_tree_make_whole (self, NULL, error))
return FALSE;

subdir = g_hash_table_lookup (self->subdirs, split_path->pdata[start]);
if (!subdir)
Expand All @@ -410,6 +590,7 @@ ostree_mutable_tree_walk (OstreeMutableTree *self,
GHashTable *
ostree_mutable_tree_get_subdirs (OstreeMutableTree *self)
{
_assert_ostree_mutable_tree_make_whole (self);
return self->subdirs;
}

Expand All @@ -422,6 +603,7 @@ ostree_mutable_tree_get_subdirs (OstreeMutableTree *self)
GHashTable *
ostree_mutable_tree_get_files (OstreeMutableTree *self)
{
_assert_ostree_mutable_tree_make_whole (self);
return self->files;
}

Expand All @@ -435,3 +617,27 @@ ostree_mutable_tree_new (void)
{
return (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL);
}

/**
* ostree_mutable_tree_new_from_checksum:
* @repo: The repo which contains the objects refered by the checksums.
* @contents_checksum: dirtree checksum
* @metadata_checksum: dirmeta checksum
*
* Creates a new OstreeMutableTree with the contents taken from the given repo
* and checksums. The data will be loaded from the repo lazily as needed.
*
* Returns: (transfer full): A new tree
*/
OstreeMutableTree *
ostree_mutable_tree_new_from_checksum (OstreeRepo *repo,
const char *contents_checksum,
const char *metadata_checksum)
{
OstreeMutableTree* out = (OstreeMutableTree*)g_object_new (OSTREE_TYPE_MUTABLE_TREE, NULL);
out->state = MTREE_STATE_LAZY;
out->repo = g_object_ref (repo);
out->contents_checksum = g_strdup (contents_checksum);
out->metadata_checksum = g_strdup (metadata_checksum);
return out;
}
Loading

0 comments on commit 3d8a8ac

Please sign in to comment.