From 77244efe03bce57052d7f63c32fa242dde087e8e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 20 Jul 2018 12:08:50 -0400 Subject: [PATCH] serialize-status: serialize global and repo-local exclude file metadata Changes to the global or repo-local excludes files can change the results returned by "git status" for untracked files. Therefore, it is important that the exclude-file values used during serialization are still current at the time of deserialization. Teach "git status --serialize" to report metadata on the user's global exclude file (which defaults to "$XDG_HOME/git/ignore") and for the repo-local excludes file (which is in ".git/info/excludes"). Serialize will record the pathnames and mtimes for these files in the serialization header (next to the mtime data for the .git/index file). Teach "git status --deserialize" to validate this new metadata. If either exclude file has changed since the serialization-cache-file was written, then deserialize will reject the cache file and force a full/normal status run. Signed-off-by: Jeff Hostetler --- wt-status-deserialize.c | 85 ++++++++++++++++++++++++++++ wt-status-serialize.c | 120 ++++++++++++++++++++++++++++++++++++++++ wt-status.h | 8 +++ 3 files changed, 213 insertions(+) diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index fb5bbd19effdc5..bdbb43c3b112fe 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -6,6 +6,7 @@ #include "pkt-line.h" #include "trace.h" #include "statinfo.h" +#include "path.h" static struct trace_key trace_deserialize = TRACE_KEY_INIT(DESERIALIZE); @@ -69,12 +70,69 @@ static int my_validate_index(const struct cache_time *mtime_reported) return DESERIALIZE_OK; } +/* + * Use the given key and exclude pathname to compute a serialization header + * reflecting the current contents on disk. See if that matches the value + * computed for this key when the cache was written. Reject the cache if + * anything has changed. + */ +static int my_validate_excludes(const char *path, const char *key, const char *line) +{ + struct strbuf sb = STRBUF_INIT; + int r; + + wt_serialize_compute_exclude_header(&sb, key, path); + + r = (strcmp(line, sb.buf) ? DESERIALIZE_ERR : DESERIALIZE_OK); + + if (r == DESERIALIZE_ERR) + trace_printf_key(&trace_deserialize, + "%s changed [cached '%s'][observed '%s']", + key, line, sb.buf); + + strbuf_release(&sb); + return r; +} + +static int my_parse_core_excludes(const char *line) +{ + /* + * In dir.c:setup_standard_excludes() they use either the value of + * the "core.excludefile" variable (stored in the global "excludes_file" + * variable) -or- the default value "$XDG_HOME/git/ignore". This is done + * during wt_status_collect_untracked() which we are hoping to not call. + * + * Fake the setup here. + */ + + if (excludes_file) { + return my_validate_excludes(excludes_file, "core_excludes", line); + } else { + char *path = xdg_config_home("ignore"); + int r = my_validate_excludes(path, "core_excludes", line); + free(path); + return r; + } +} + +static int my_parse_repo_excludes(const char *line) +{ + char *path = git_pathdup("info/exclude"); + int r = my_validate_excludes(path, "repo_excludes", line); + free(path); + + return r; +} + static int wt_deserialize_v1_header(struct wt_status *s, int fd) { struct cache_time index_mtime; int line_len, nr_fields; const char *line; const char *arg; + int have_required_index_mtime = 0; + int have_required_core_excludes = 0; + int have_required_repo_excludes = 0; /* * parse header lines up to the first flush packet. @@ -90,6 +148,20 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) nr_fields, line); return DESERIALIZE_ERR; } + have_required_index_mtime = 1; + continue; + } + + if (skip_prefix(line, "core_excludes ", &arg)) { + if (my_parse_core_excludes(line) != DESERIALIZE_OK) + return DESERIALIZE_ERR; + have_required_core_excludes = 1; + continue; + } + if (skip_prefix(line, "repo_excludes ", &arg)) { + if (my_parse_repo_excludes(line) != DESERIALIZE_OK) + return DESERIALIZE_ERR; + have_required_repo_excludes = 1; continue; } @@ -174,6 +246,19 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) return DESERIALIZE_ERR; } + if (!have_required_index_mtime) { + trace_printf_key(&trace_deserialize, "missing '%s'", "index_mtime"); + return DESERIALIZE_ERR; + } + if (!have_required_core_excludes) { + trace_printf_key(&trace_deserialize, "missing '%s'", "core_excludes"); + return DESERIALIZE_ERR; + } + if (!have_required_repo_excludes) { + trace_printf_key(&trace_deserialize, "missing '%s'", "repo_excludes"); + return DESERIALIZE_ERR; + } + return my_validate_index(&index_mtime); } diff --git a/wt-status-serialize.c b/wt-status-serialize.c index 7af50341619ed0..036979f30ad026 100644 --- a/wt-status-serialize.c +++ b/wt-status-serialize.c @@ -1,13 +1,131 @@ #include "git-compat-util.h" +#include "environment.h" #include "hex.h" #include "repository.h" #include "wt-status.h" #include "pkt-line.h" #include "trace.h" #include "read-cache-ll.h" +#include "path.h" static struct trace_key trace_serialize = TRACE_KEY_INIT(SERIALIZE); +/* + * Compute header record for exclude file using format: + * SP SP LF + */ +void wt_serialize_compute_exclude_header(struct strbuf *sb, + const char *key, + const char *path) +{ + struct stat st; + struct stat_data sd; + + memset(&sd, 0, sizeof(sd)); + + strbuf_setlen(sb, 0); + + if (!path || !*path) { + strbuf_addf(sb, "%s U (unset)", key); + } else if (lstat(path, &st) == -1) { + if (is_missing_file_error(errno)) + strbuf_addf(sb, "%s E (not-found) %s", key, path); + else + strbuf_addf(sb, "%s E (other) %s", key, path); + } else { + fill_stat_data(&sd, &st); + strbuf_addf(sb, "%s F %d %d %s", + key, sd.sd_mtime.sec, sd.sd_mtime.nsec, path); + } +} + +static void append_exclude_info(int fd, const char *path, const char *key) +{ + struct strbuf sb = STRBUF_INIT; + + wt_serialize_compute_exclude_header(&sb, key, path); + + packet_write_fmt(fd, "%s\n", sb.buf); + + strbuf_release(&sb); +} + +static void append_core_excludes_file_info(int fd) +{ + /* + * Write pathname and mtime of the core/global excludes file to + * the status cache header. Since a change in the global excludes + * will/may change the results reported by status, the deserialize + * code should be able to reject the status cache if the excludes + * file changes since when the cache was written. + * + * The "core.excludefile" setting defaults to $XDG_HOME/git/ignore + * and uses a global variable which should have been set during + * wt_status_collect_untracked(). + * + * See dir.c:setup_standard_excludes() + */ + append_exclude_info(fd, excludes_file, "core_excludes"); +} + +static void append_repo_excludes_file_info(int fd) +{ + /* + * Likewise, there is a per-repo excludes file in .git/info/excludes + * that can change the results reported by status. And the deserialize + * code needs to be able to reject the status cache if this file + * changes. + * + * See dir.c:setup_standard_excludes() and git_path_info_excludes(). + * We replicate the pathname construction here because of the static + * variables/functions used in dir.c. + */ + char *path = git_pathdup("info/exclude"); + + append_exclude_info(fd, path, "repo_excludes"); + + free(path); +} + +/* + * WARNING: The status cache attempts to preserve the essential in-memory + * status data after a status scan into a "serialization" (aka "status cache") + * file. It allows later "git status --deserialize=" instances to + * just print the cached status results without scanning the workdir (and + * without reading the index). + * + * The status cache file is valid as long as: + * [1] the set of functional command line options are the same (think "-u"). + * [2] repo-local and user-global configuration settings are compatible. + * [3] nothing in the workdir has changed. + * + * We rely on: + * [1.a] We remember the relevant (functional, non-display) command line + * arguments in the status cache header. + * [2.a] We use the mtime of the .git/index to detect staging changes. + * [2.b] We use the mtimes of the excludes files to detect changes that + * might affect untracked file reporting. + * + * But we need external help to verify [3]. + * [] This includes changes to tracked files. + * [] This includes changes to tracked .gitignore files that might change + * untracked file reporting. + * [] This includes the creation of new, untracked per-directory .gitignore + * files that might change untracked file reporting. + * + * [3.a] On GVFS repos, we rely on the GVFS service (mount) daemon to + * watch the filesystem and invalidate (delete) the status cache + * when anything changes inside the workdir. + * + * [3.b] TODO This problem is not solved for non-GVFS repos. + * [] It is possible that the untracked-cache index extension + * could help with this but that requires status to read the + * index to load the extension. + * [] It is possible that the new fsmonitor facility could also + * provide this information, but that to requires reading the + * index. + */ + /* * Write V1 header fields. */ @@ -20,6 +138,8 @@ static void wt_serialize_v1_header(struct wt_status *s, int fd) packet_write_fmt(fd, "index_mtime %d %d\n", s->repo->index->timestamp.sec, s->repo->index->timestamp.nsec); + append_core_excludes_file_info(fd); + append_repo_excludes_file_info(fd); /* * Write data from wt_status to qualify this status report. diff --git a/wt-status.h b/wt-status.h index 31cfe5003e052b..d7935b2d80d06b 100644 --- a/wt-status.h +++ b/wt-status.h @@ -233,4 +233,12 @@ void wt_status_serialize_v1(int fd, struct wt_status *s); int wt_status_deserialize(const struct wt_status *cmd_s, const char *path); +/* + * A helper routine for serialize and deserialize to compute + * metadata for the user-global and repo-local excludes files. + */ +void wt_serialize_compute_exclude_header(struct strbuf *sb, + const char *key, + const char *path); + #endif /* STATUS_H */