Skip to content

Commit

Permalink
Implement maxdepth in GCSFileSystem._find (#566)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianthomas23 committed Jul 4, 2023
1 parent 27e681f commit 1961f9f
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
17 changes: 16 additions & 1 deletion gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1225,11 +1225,21 @@ async def _isdir(self, path):
return False

async def _find(
self, path, withdirs=False, detail=False, prefix="", versions=False, **kwargs
self,
path,
withdirs=False,
detail=False,
prefix="",
versions=False,
maxdepth=None,
**kwargs,
):
path = self._strip_protocol(path)
bucket, key, generation = self.split_path(path)

if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

if prefix:
_path = "" if not key else key.rstrip("/") + "/"
_prefix = f"{_path}{prefix}"
Expand Down Expand Up @@ -1276,6 +1286,11 @@ async def _find(
if withdirs:
objects = sorted(objects + list(dirs.values()), key=lambda x: x["name"])

if maxdepth:
# Filter returned objects based on requested maxdepth
depth = path.count("/") + maxdepth
objects = list(filter(lambda o: o["name"].count("/") <= depth, objects))

if detail:
if versions:
return {f"{o['name']}#{o['generation']}": o for o in objects}
Expand Down
43 changes: 41 additions & 2 deletions gcsfs/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,10 +1006,16 @@ def test_put_small_cache_validity(gcs):
def test_pseudo_dir_find(gcs):
gcs.rm(f"{TEST_BUCKET}/*", recursive=True)
gcs.touch(f"{TEST_BUCKET}/a/b/file")

c = gcs.glob(f"{TEST_BUCKET}/a/b/*")
assert c == [f"{TEST_BUCKET}/a/b/file"]

b = set(gcs.glob(f"{TEST_BUCKET}/a/*"))
assert f"{TEST_BUCKET}/a/b" in b
assert b == {f"{TEST_BUCKET}/a/b"}

a = set(gcs.glob(f"{TEST_BUCKET}/*"))
assert f"{TEST_BUCKET}/a" in a
assert a == {f"{TEST_BUCKET}/a"}

assert gcs.find(TEST_BUCKET) == [f"{TEST_BUCKET}/a/b/file"]
assert gcs.find(f"{TEST_BUCKET}/a", withdirs=True) == [
f"{TEST_BUCKET}/a",
Expand Down Expand Up @@ -1395,3 +1401,36 @@ def test_copy_cache_invalidated(gcs):

# Prior to fix the following failed as cache stale
assert gcs.isfile(target_file2)


def test_find_maxdepth(gcs):
assert gcs.find(f"{TEST_BUCKET}/nested", maxdepth=None) == [
f"{TEST_BUCKET}/nested/file1",
f"{TEST_BUCKET}/nested/file2",
f"{TEST_BUCKET}/nested/nested2/file1",
f"{TEST_BUCKET}/nested/nested2/file2",
]

assert gcs.find(f"{TEST_BUCKET}/nested", maxdepth=None, withdirs=True) == [
f"{TEST_BUCKET}/nested",
f"{TEST_BUCKET}/nested/file1",
f"{TEST_BUCKET}/nested/file2",
f"{TEST_BUCKET}/nested/nested2",
f"{TEST_BUCKET}/nested/nested2/file1",
f"{TEST_BUCKET}/nested/nested2/file2",
]

assert gcs.find(f"{TEST_BUCKET}/nested", maxdepth=1) == [
f"{TEST_BUCKET}/nested/file1",
f"{TEST_BUCKET}/nested/file2",
]

assert gcs.find(f"{TEST_BUCKET}/nested", maxdepth=1, withdirs=True) == [
f"{TEST_BUCKET}/nested",
f"{TEST_BUCKET}/nested/file1",
f"{TEST_BUCKET}/nested/file2",
f"{TEST_BUCKET}/nested/nested2",
]

with pytest.raises(ValueError, match="maxdepth must be at least 1"):
gcs.find(f"{TEST_BUCKET}/nested", maxdepth=0)

0 comments on commit 1961f9f

Please sign in to comment.