-
Notifications
You must be signed in to change notification settings - Fork 71
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(fs): add dirs caching, make it a bit more robust #322
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -261,8 +261,8 @@ def _ids_cache(self): | |
def _cache_path_id(self, path, *item_ids, cache=None): | ||
cache = cache or self._ids_cache | ||
for item_id in item_ids: | ||
cache["dirs"][path].append(item_id) | ||
cache["ids"][item_id] = path | ||
cache["dirs"][path].append(item_id) | ||
|
||
@cached_property | ||
def _list_params(self): | ||
|
@@ -316,7 +316,9 @@ def _gdrive_list_ids(self, query_ids): | |
query = f"({query}) and trashed=false" | ||
return self._gdrive_list(query) | ||
|
||
def _get_remote_item_ids(self, parent_ids, title): | ||
def _get_remote_item_ids( | ||
self, parent_ids, parent_path, title, use_cache=True | ||
): | ||
if not parent_ids: | ||
return None | ||
query = "trashed=false and ({})".format( | ||
|
@@ -326,13 +328,19 @@ def _get_remote_item_ids(self, parent_ids, title): | |
) | ||
query += " and title='{}'".format(title.replace("'", "\\'")) | ||
|
||
# GDrive list API is case insensitive, we need to compare | ||
# all results and pick the ones with the right title | ||
return [ | ||
item["id"] | ||
for item in self._gdrive_list(query) | ||
if item["title"] == title | ||
] | ||
res = [] | ||
for item in self._gdrive_list(query): | ||
# GDrive list API is case insensitive, we need to compare | ||
# all results and pick the ones with the right title | ||
if item["title"] == title: | ||
res.append(item["id"]) | ||
|
||
if item["mimeType"] == FOLDER_MIME_TYPE and use_cache: | ||
self._cache_path_id( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. C: This is the main change here. It was missing and DVC kept hitting |
||
posixpath.join(parent_path, item["title"]), item["id"] | ||
) | ||
|
||
return res | ||
|
||
def _get_cached_item_ids(self, path, use_cache): | ||
if not path: | ||
|
@@ -348,7 +356,9 @@ def _path_to_item_ids(self, path, create=False, use_cache=True): | |
|
||
parent_path, title = posixpath.split(path) | ||
parent_ids = self._path_to_item_ids(parent_path, create, use_cache) | ||
item_ids = self._get_remote_item_ids(parent_ids, title) | ||
item_ids = self._get_remote_item_ids( | ||
parent_ids, parent_path, title, use_cache | ||
) | ||
if item_ids: | ||
return item_ids | ||
|
||
|
@@ -418,11 +428,7 @@ def info(self, path): | |
def ls(self, path, detail=False): | ||
bucket, base = self.split_path(path) | ||
|
||
cached = base in self._ids_cache["dirs"] | ||
if cached: | ||
dir_ids = self._ids_cache["dirs"][base] | ||
else: | ||
dir_ids = self._path_to_item_ids(base) | ||
dir_ids = self._path_to_item_ids(base) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. C: since now |
||
|
||
if not dir_ids: | ||
raise FileNotFoundError( | ||
|
@@ -452,9 +458,6 @@ def ls(self, path, detail=False): | |
} | ||
) | ||
|
||
if not cached: | ||
self._cache_path_id(root_path, *dir_ids) | ||
|
||
if detail: | ||
return contents | ||
else: | ||
|
@@ -464,10 +467,10 @@ def find(self, path, detail=False, **kwargs): | |
bucket, base = self.split_path(path) | ||
|
||
seen_paths = set() | ||
cached = base in self._ids_cache["dirs"] | ||
if not cached: | ||
dir_ids = self._path_to_item_ids(base) | ||
self._cache_path_id(base, *dir_ids) | ||
|
||
# Make sure the base path is cached and dir_ids below has some | ||
# dirs revelant to this call | ||
self._path_to_item_ids(base) | ||
|
||
dir_ids = [self._ids_cache["ids"].copy()] | ||
contents = [] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
C: This is better to do in this order since we always refer first to
dirs
to decide id item is cached or not. Still looks fragile, and we still ideally should be using proper locking. I'll looking into this as a follow up.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIRC, this is not true in
find
where we useids
first.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahh, I see that you also changed
find
below.