-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Pulled over a lot of the wrappers from scRNAseq.
- Loading branch information
Showing
12 changed files
with
389 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#' List available references | ||
#' | ||
#' List the available reference datasets and the associated versions in \pkg{celldex}. | ||
#' | ||
#' @param name String containing the name of the reference dataset. | ||
#' | ||
#' @return | ||
#' For \code{listReferences}, a character vector containing the names of the available references. | ||
#' | ||
#' For \code{listVersions}, a character vector containing the names of the available versions of the \code{name} reference. | ||
#' | ||
#' For \code{fetchLatestVersion}, a string containing the name of the latest version. | ||
#' | ||
#' @author Aaron Lun | ||
#' | ||
#' @examples | ||
#' listReferences() | ||
#' listVersions("immgen") | ||
#' fetchLatestVersion("immgen") | ||
#' | ||
#' @export | ||
#' @importFrom gypsum listAssets | ||
listReferences <- function() { | ||
listAssets("celldex") | ||
} | ||
|
||
#' @export | ||
#' @rdname listVersions | ||
listVersions <- function(name) { | ||
gypsum::listVersions("celldex", name) | ||
} | ||
|
||
#' @export | ||
#' @rdname listVersions | ||
#' @importFrom gypsum fetchLatest | ||
fetchLatestVersion <- function(name) { | ||
fetchLatest("celldex", name) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#' Search reference metadata | ||
#' | ||
#' Search for reference datasets of interest based on matching text in the associated metadata. | ||
#' | ||
#' @param query String or a \code{gypsum.search.object}, see Examples. | ||
#' @inheritParams surveyReferences | ||
#' | ||
#' @return | ||
#' A \linkS4class{DataFrame} where each row corresponds to a dataset, containing various columns of metadata. | ||
#' Some columns may be lists to capture 1:many mappings. | ||
#' | ||
#' @details | ||
#' The returned DataFrame contains the usual suspects like the title and description for each dataset, | ||
#' the number of rows and columns, the organisms and genome builds involved, | ||
#' whether the dataset has any pre-computed reduced dimensions, and so on. | ||
#' More details can be found in the Bioconductor metadata schema at \url{https://github.com/ArtifactDB/bioconductor-metadata-index}. | ||
#' | ||
#' @author Aaron Lun | ||
#' | ||
#' @examples | ||
#' searchReferences(defineTextQuery("immun%", partial=TRUE))[,c("name", "title")] | ||
#' searchReferences(defineTextQuery("10090", field="taxonomy_id"))[,c("name", "title")] | ||
#' searchReferences( | ||
#' defineTextQuery("10090", field="taxonomy_id") & | ||
#' defineTextQuery("immun%", partial=TRUE) | ||
#' )[,c("name", "title")] | ||
#' | ||
#' @seealso | ||
#' \code{\link{surveyReferences}}, to easily obtain a listing of all available datasets. | ||
#' @export | ||
#' @importFrom S4Vectors DataFrame | ||
#' @importFrom gypsum cacheDirectory fetchMetadataDatabase searchMetadataTextFilter | ||
#' @importFrom DBI dbConnect dbDisconnect dbGetQuery | ||
#' @importFrom RSQLite SQLite | ||
searchReferences <- function(query, cache=cacheDirectory(), overwrite=FALSE, latest=TRUE) { | ||
filter <- searchMetadataTextFilter(query) | ||
|
||
bpath <- fetchMetadataDatabase(cache=cache, overwrite=overwrite) | ||
con <- dbConnect(SQLite(), bpath) | ||
on.exit(dbDisconnect(con)) | ||
|
||
stmt <- "SELECT json_extract(metadata, '$') AS meta, versions.asset AS asset, versions.version AS version, path"; | ||
if (!latest) { | ||
stmt <- paste0(stmt, ", versions.latest AS latest") | ||
} | ||
stmt <- paste0(stmt, " FROM paths LEFT JOIN versions ON paths.vid = versions.vid WHERE versions.project = 'scRNAseq'") | ||
if (latest) { | ||
stmt <- paste0(stmt, " AND versions.latest = 1") | ||
} | ||
if (!is.null(filter)) { | ||
stmt <- paste0(stmt, " AND ", filter$where) | ||
everything <- dbGetQuery(con, stmt, params=filter$parameters) | ||
} else { | ||
everything <- dbGetQuery(con, stmt) | ||
} | ||
|
||
sanitize_query_to_output(everything, latest) | ||
} | ||
|
||
#' @export | ||
gypsum::defineTextQuery |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#' Survey reference metadata | ||
#' | ||
#' Metadata survey for all available reference datasets in the \pkg{celldex} package. | ||
#' | ||
#' @param cache,overwrite Arguments to pass to \code{\link{fetchMetadataDatabase}}. | ||
#' @param latest Whether to only consider the latest version of each dataset. | ||
#' | ||
#' @return | ||
#' A \linkS4class{DataFrame} where each row corresponds to a dataset, containing various columns of metadata. | ||
#' Some columns may be lists to capture 1:many mappings. | ||
#' | ||
#' @details | ||
#' The returned DataFrame contains the usual suspects like the title and description for each dataset, | ||
#' the number of samples and types of labels, the organisms and genome builds involved, and so on. | ||
#' More details can be found in the Bioconductor metadata schema at \url{https://github.com/ArtifactDB/bioconductor-metadata-index}. | ||
#' | ||
#' @author Aaron Lun | ||
#' | ||
#' @examples | ||
#' surveyReferences() | ||
#' | ||
#' @seealso | ||
#' \code{\link{searchReferences}}, to search on the metadata for specific datasets. | ||
#' | ||
#' @export | ||
#' @importFrom S4Vectors DataFrame | ||
#' @importFrom gypsum cacheDirectory fetchMetadataDatabase | ||
#' @importFrom DBI dbConnect dbDisconnect dbGetQuery | ||
#' @importFrom RSQLite SQLite | ||
surveyReferences <- function(cache=cacheDirectory(), overwrite=FALSE, latest=TRUE) { | ||
bpath <- fetchMetadataDatabase(cache=cache, overwrite=overwrite) | ||
con <- dbConnect(SQLite(), bpath) | ||
on.exit(dbDisconnect(con)) | ||
|
||
stmt <- "SELECT json_extract(metadata, '$') AS meta, versions.asset AS asset, versions.version AS version, path"; | ||
if (!latest) { | ||
stmt <- paste0(stmt, ", versions.latest AS latest") | ||
} | ||
stmt <- paste0(stmt, " FROM paths LEFT JOIN versions ON paths.vid = versions.vid WHERE versions.project = 'celldex'") | ||
if (latest) { | ||
stmt <- paste0(stmt, " AND versions.latest = 1") | ||
} | ||
everything <- dbGetQuery(con, stmt) | ||
|
||
sanitize_query_to_output(everything, latest) | ||
} | ||
|
||
#' @importFrom S4Vectors DataFrame | ||
#' @importFrom jsonlite fromJSON | ||
sanitize_query_to_output <- function(results, latest, meta.name="meta") { | ||
path <- results$path | ||
has.slash <- grepl("/", path) | ||
path[!has.slash] <- NA_character_ | ||
path[has.slash] <- sub("/[^/]+$", "", path[has.slash]) | ||
df <- DataFrame(name = results$asset, version = results$version, path = path) | ||
if (!latest) { | ||
df$latest <- results$latest == 1 | ||
} | ||
|
||
all_meta <- lapply(results[[meta.name]], fromJSON, simplifyVector=FALSE) | ||
df$title <- extract_atomic_from_json(all_meta, function(x) x$title, "character") | ||
df$description <- extract_atomic_from_json(all_meta, function(x) x$title, "character") | ||
df$taxonomy_id <- extract_charlist_from_json(all_meta, function(x) x$taxonomy_id) | ||
df$genome <- extract_charlist_from_json(all_meta, function(x) x$genome) | ||
|
||
df$samples <- extract_atomic_from_json(all_meta, function(x) x$applications$takane$summarized_experiment$columns, "integer") | ||
df$labels <- extract_charlist_from_json(all_meta, function(x) x$applications$takane$summarized_experiment$column_annotations) | ||
|
||
df$bioconductor_version < extract_atomic_from_json(all_meta, function(x) x$bioconductor_version, "character") | ||
df$maintainer_name < extract_atomic_from_json(all_meta, function(x) x$maintainer_name, "character") | ||
df$maintainer_email < extract_atomic_from_json(all_meta, function(x) x$maintainer_email, "character") | ||
|
||
sources <- vector("list", length(all_meta)) | ||
for (i in seq_along(all_meta)) { | ||
cursources <- all_meta[[i]]$sources | ||
if (is.null(cursources)) { | ||
sources[[i]] <- DataFrame(provider=character(0), id=character(0), version=character(0)) | ||
} else { | ||
sources[[i]] <- DataFrame( | ||
provider = extract_atomic_from_json(cursources, function(x) x$provider, "character"), | ||
id = extract_atomic_from_json(cursources, function(x) x$id, "character"), | ||
version = extract_atomic_from_json(cursources, function(x) x$version, "character") | ||
) | ||
} | ||
} | ||
df$sources <- as(sources, "CompressedList") | ||
|
||
df | ||
} | ||
|
||
extract_atomic_from_json <- function(metadata, extract, type) { | ||
vapply(metadata, function(y) { | ||
x <- extract(y) | ||
if (is.null(x)) { | ||
as(NA, type) | ||
} else { | ||
x | ||
} | ||
}, vector(type, 1)) | ||
} | ||
|
||
extract_charlist_from_json <- function(metadata, extract) { | ||
output <- lapply(metadata, function(y) { | ||
x <- extract(y) | ||
if (is.null(y)) { | ||
character(0) | ||
} else { | ||
x | ||
} | ||
}) | ||
as(output, "CompressedList") | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.