Skip to content

Commit

Permalink
Merge remote-tracking branch 'ghall/just_gene_sets'
Browse files Browse the repository at this point in the history
  • Loading branch information
Alok Saldanha authored and Alok Saldanha committed Mar 2, 2024
2 parents f046e7c + 19caa6c commit 66cca86
Showing 1 changed file with 22 additions and 5 deletions.
27 changes: 22 additions & 5 deletions cellxgene_gateway/items/file/fileitem_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ def __init__(
h5ad_suffix=dir_util.h5ad_suffix,
annotation_dir_suffix=dir_util.annotations_suffix,
annotation_file_suffix=".csv",
gene_set_file_suffix="_gene_sets.csv",
):
self._name = name
self.base_path = base_path
self.h5ad_suffix = h5ad_suffix
self.annotation_dir_suffix = annotation_dir_suffix
self.annotation_file_suffix = annotation_file_suffix
self.gene_set_file_suffix = gene_set_file_suffix

@property
def name(self):
return self._name or f"Files:{self.base_path}"

def is_gene_set(self, path: str) -> bool:
return ("_gene_sets" in path or "-gene-sets" in path) and path.endswith(
self.annotation_file_suffix
)
return path.endswith(self.gene_set_file_suffix)

def is_h5ad_file(self, path: str) -> bool:
return path.endswith(self.h5ad_suffix) and os.path.isfile(path)
Expand Down Expand Up @@ -186,12 +186,29 @@ def make_annotations_for_fileitem(self, item: FileItem) -> List[FileItem]:
annotations_subpath = self.get_annotations_subpath(item)
annotations_fullpath = self.full_path(annotations_subpath)
if os.path.isdir(annotations_fullpath):
return [
sorted_files = sorted(os.listdir(annotations_fullpath))
annotation_files = [
self.make_fileitem_from_path(annotation, annotations_subpath, True)
for annotation in sorted(os.listdir(annotations_fullpath))
for annotation in sorted_files
if annotation.endswith(self.annotation_file_suffix)
and not self.is_gene_set(annotation)
and os.path.isfile(os.path.join(annotations_fullpath, annotation))
]

# Catch gene sets without accompanying [annotations].csv
gene_sets_files = [
self.make_fileitem_from_path(
annotation[: -len(self.gene_set_file_suffix)] + ".csv",
annotations_subpath,
True,
)
for annotation in sorted_files
if self.is_gene_set(annotation)
and annotation[: -len(self.gene_set_file_suffix)]
not in [a.name for a in annotation_files]
and os.path.isfile(os.path.join(annotations_fullpath, annotation))
]

return sorted(annotation_files + gene_sets_files, key=lambda x: x.name)
else:
return None

0 comments on commit 66cca86

Please sign in to comment.