Skip to content

Commit

Permalink
Merge branch 'develop' into backports/96cd562
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Sep 20, 2024
2 parents 96cd562 + 34a12fb commit b34c9c6
Show file tree
Hide file tree
Showing 19 changed files with 241 additions and 76 deletions.
2 changes: 1 addition & 1 deletion lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '9.1'
'version': '9.2'
},
'tags': [
{
Expand Down
131 changes: 123 additions & 8 deletions lambdas/service/openapi.json

Large diffs are not rendered by default.

16 changes: 2 additions & 14 deletions scripts/post_deploy_tdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
TDRPlugin,
)
from azul.terra import (
SourceRef as TDRSourceRef,
TDRClient,
TDRSourceSpec,
)
Expand Down Expand Up @@ -91,20 +90,9 @@ def verify_source(self,
catalog: CatalogName,
source_spec: TDRSourceSpec
) -> None:
source = self.tdr.lookup_source(source_spec)
log.info('TDR client is authorized for API access to %s.', source_spec)
require(source.project == source_spec.subdomain,
'Actual Google project of TDR source differs from configured one',
source.project, source_spec.subdomain)
# Uppercase is standard for multi-regions in the documentation but TDR
# returns 'us' in lowercase
require(source.location.lower() == config.tdr_source_location.lower(),
'Actual storage location of TDR source differs from configured one',
source.location, config.tdr_source_location)
# FIXME: Eliminate azul.terra.TDRClient.TDRSource
# https://github.com/DataBiosphere/azul/issues/5524
ref = TDRSourceRef(id=source.id, spec=source_spec)
plugin = self.repository_plugin(catalog)
ref = plugin.resolve_source(str(source_spec))
log.info('TDR client is authorized for API access to %s.', source_spec)
subgraph_count = sum(plugin.list_partitions(ref).values())
require(subgraph_count > 0,
'Source spec is empty (bad prefix?)', source_spec)
Expand Down
2 changes: 1 addition & 1 deletion scripts/update_subgraph_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
RepositoryPlugin,
)
from azul.terra import (
SourceRef as TDRSourceRef,
TDRClient,
TDRSourceRef,
)

environment = load_module(module_name='environment',
Expand Down
6 changes: 4 additions & 2 deletions src/azul/plugins/metadata/hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'estimated_cell_count': 'projectEstimatedCellCount',
'is_tissue_atlas_project': 'isTissueAtlasProject',
'tissue_atlas': 'tissueAtlas',
'bionetwork_name': 'bionetworkName'
'bionetwork_name': 'bionetworkName',
'data_use_restriction': 'dataUseRestriction'
},
'sequencing_protocols': {
'instrument_manufacturer_model': 'instrumentManufacturerModel',
Expand Down Expand Up @@ -332,7 +333,8 @@ def facets(self) -> Sequence[str]:
'publicationTitle',
'isTissueAtlasProject',
'tissueAtlas',
'bionetworkName'
'bionetworkName',
'dataUseRestriction'
]

@property
Expand Down
6 changes: 4 additions & 2 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,8 @@ def _project_types(cls) -> FieldTypes:
'is_tissue_atlas_project': null_bool,
'tissue_atlas': [tissue_atlas],
'bionetwork_name': [null_str],
'estimated_cell_count': null_int
'estimated_cell_count': null_int,
'data_use_restriction': null_str
}

def _project(self, project: api.Project) -> MutableJSON:
Expand Down Expand Up @@ -733,7 +734,8 @@ def _project(self, project: api.Project) -> MutableJSON:
for bionetwork in project.bionetworks),
'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)),
'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks),
'estimated_cell_count': project.estimated_cell_count
'estimated_cell_count': project.estimated_cell_count,
'data_use_restriction': project.data_use_restriction
}

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ def make_projects(self, entry) -> MutableJSONs:
'estimatedCellCount': project['estimated_cell_count'],
'isTissueAtlasProject': project['is_tissue_atlas_project'],
'tissueAtlas': project.get('tissue_atlas'),
'bionetworkName': project['bionetwork_name']
'bionetworkName': project['bionetwork_name'],
'dataUseRestriction': project.get('data_use_restriction')
}
if self.entity_type == 'projects':
translated_project['projectDescription'] = project.get('project_description', [])
Expand Down
4 changes: 2 additions & 2 deletions src/azul/plugins/repository/tdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@
longest_common_prefix,
)
from azul.terra import (
SourceRef as TDRSourceRef,
TDRClient,
TDRSourceRef,
TDRSourceSpec,
)
from azul.time import (
Expand Down Expand Up @@ -193,7 +193,7 @@ def _drs_client(cls,
return cls._user_authenticated_tdr(authentication).drs_client()

def _lookup_source_id(self, spec: TDRSourceSpec) -> str:
return self.tdr.lookup_source(spec).id
return self.tdr.lookup_source(spec)

def list_bundles(self,
source: TDRSourceRef,
Expand Down
2 changes: 1 addition & 1 deletion src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
TDRBundle,
TDRBundleFQID,
TDRPlugin,
TDRSourceRef,
)
from azul.terra import (
TDRSourceRef,
TDRSourceSpec,
)
from azul.types import (
Expand Down
2 changes: 1 addition & 1 deletion src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
TDRPlugin,
)
from azul.terra import (
SourceRef as TDRSourceRef,
TDRSourceRef,
TDRSourceSpec,
)
from azul.types import (
Expand Down
47 changes: 25 additions & 22 deletions src/azul/terra.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def contains(self, other: 'SourceSpec') -> bool:
)


class SourceRef(BaseSourceRef[TDRSourceSpec, 'TDRSourceRef']):
class TDRSourceRef(BaseSourceRef[TDRSourceSpec, 'TDRSourceRef']):
pass


Expand Down Expand Up @@ -400,28 +400,32 @@ class TDRClient(SAMClient):
A client for the Broad Institute's Terra Data Repository aka "Jade".
"""

# FIXME: Eliminate azul.terra.TDRClient.TDRSource
# https://github.com/DataBiosphere/azul/issues/5524
@attrs.frozen(kw_only=True)
class TDRSource:
project: str
id: str
location: str

@cache
def lookup_source(self, source_spec: TDRSourceSpec) -> TDRSource:
def lookup_source(self, source_spec: TDRSourceSpec) -> str:
"""
Validate that the repository's reported values for the snapshot's Google
project name and storage location match our expectations, and return the
snapshot's UUID.
"""
source = self._lookup_source(source_spec)
actual_project = source['dataProject']
require(actual_project == source_spec.subdomain,
'Actual Google project of TDR source differs from configured one',
actual_project, source_spec.subdomain)
storage = one(
storage
for dataset in (s['dataset'] for s in source['source'])
for storage in dataset['storage']
if storage['cloudResource'] == 'bigquery'
resource
for resource in source['storage']
if resource['cloudResource'] == 'bigquery'
)
return self.TDRSource(project=source['dataProject'],
id=source['id'],
location=storage['region'])

def _retrieve_source(self, source: SourceRef) -> MutableJSON:
actual_location = storage['region']
# Uppercase is standard for multi-regions in the documentation but TDR
# returns 'us' in lowercase
require(actual_location.lower() == config.tdr_source_location.lower(),
'Actual storage location of TDR source differs from configured one',
actual_location, config.tdr_source_location)
return source['id']

def _retrieve_source(self, source: TDRSourceRef) -> MutableJSON:
endpoint = self._repository_endpoint('snapshots', source.id)
response = self._request('GET', endpoint)
response = self._check_response(endpoint, response)
Expand All @@ -438,8 +442,7 @@ def _lookup_source(self, source: TDRSourceSpec) -> MutableJSON:
if total == 0:
raise self._insufficient_access(str(endpoint))
elif total == 1:
source_id = one(response['items'])['id']
return self._retrieve_source(SourceRef(id=source_id, spec=source))
return one(response['items'])
else:
raise TerraNameConflictException(endpoint, source.name, response)

Expand Down Expand Up @@ -632,7 +635,7 @@ def for_registered_user(cls, authentication: OAuth2) -> 'TDRClient':
def drs_client(self) -> DRSClient:
return DRSClient(http_client=self._http_client)

def get_duos(self, source: SourceRef) -> Optional[MutableJSON]:
def get_duos(self, source: TDRSourceRef) -> Optional[MutableJSON]:
response = self._retrieve_source(source)
try:
duos_id = response['duosFirecloudGroup']['duosId']
Expand Down
2 changes: 2 additions & 0 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ class Project(Entity):
supplementary_links: OrderedSet[str]
estimated_cell_count: int | None
bionetworks: OrderedSet[Bionetwork]
data_use_restriction: str | None

def __init__(self, json: JSON) -> None:
super().__init__(json)
Expand All @@ -317,6 +318,7 @@ def __init__(self, json: JSON) -> None:
self.bionetworks = OrderedSet(Bionetwork(**bionetwork)
for bionetwork in content.get('hca_bionetworks', ())
if bionetwork)
self.data_use_restriction = content.get('data_use_restriction')

def _accessions(self, namespace: str) -> set[str]:
return {a.accession for a in self.accessions if a.namespace == namespace}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions test/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,6 @@
from azul.plugins.metadata.anvil.bundle import (
Link,
)
from azul.plugins.repository.tdr import (
TDRSourceRef,
)
from azul.plugins.repository.tdr_anvil import (
BundleEntityType,
TDRAnvilBundleFQID,
Expand All @@ -171,6 +168,7 @@
from azul.terra import (
ServiceAccountCredentialsProvider,
TDRClient,
TDRSourceRef,
TDRSourceSpec,
UserCredentialsProvider,
)
Expand Down
Loading

0 comments on commit b34c9c6

Please sign in to comment.