Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,18 @@ jobs:
any::pkgdown

- name: Set up Python
uses: actions/setup-python@v2
id: setup-python
uses: actions/setup-python@v5
with:
python-version: '3.8'
python-version: '3.11'

- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install anndata
pip install numpy
env:
RETICULATE_PYTHON: /opt/hostedtoolcache/Python/3.8.10/x64/bin/python
pip install anndata numpy
# Pin reticulate to this interpreter for all subsequent steps so
# the AnnData <-> Seurat/SCE tests run instead of skipping.
echo "RETICULATE_PYTHON=${{ steps.setup-python.outputs.python-path }}" >> "$GITHUB_ENV"

- name: Build pkgdown site
run: |
Expand Down
20 changes: 14 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
Package: convert2anndata
Type: Package
Title: Convert SingleCellExperiment and Seurat Objects to AnnData
Version: 0.2.0
Title: Bidirectional Conversion Between AnnData and SingleCellExperiment / Seurat
Version: 0.3.0
Author: Dominik Otto [aut, cre] (<dotto@fredhutch.org>)
Maintainer: Dominik Otto <dotto@fredhutch.org>
Description: This package provides functions to convert SingleCellExperiment and Seurat objects to AnnData format. It handles split layers (Seurat), assays, dimensional reductions, metadata, and alternative experiments, ensuring comprehensive conversion.
Description: Bidirectional conversion between AnnData (.h5ad) and either
Seurat or SingleCellExperiment objects. Handles split layers (Seurat),
assays, dimensional reductions (obsm <-> reducedDims), metadata
(obs/var <-> colData/rowData), layers, and alternative experiments,
aiming for a faithful roundtrip.
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.1
URL: https://research.fredhutch.org/setty/en.html
RoxygenNote: 7.3.3
URL: https://github.com/settylab/convert2anndata, https://settylab.github.io/convert2anndata
BugReports: https://github.com/settylab/convert2anndata/issues
Imports:
Seurat,
SeuratObject,
SingleCellExperiment,
anndata,
reticulate,
optparse,
Matrix,
S4Vectors,
Expand All @@ -23,5 +29,7 @@ Imports:
Suggests:
testthat (>= 3.0.0),
covr,
BiocNeighbors
BiocNeighbors,
withr,
R6
biocViews: SingleCellData, DataImport, Transcriptomics, Sequencing, RNASeq
18 changes: 18 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
# Generated by roxygen2: do not edit by hand

export(align_metadata_seurat)
export(anndata_names)
export(attach_alt_experiments_sce)
export(attach_reductions_seurat)
export(check_anndata_python)
export(cli_convert)
export(convert_anndata_to_sce)
export(convert_anndata_to_seurat)
export(convert_commands)
export(convert_graph_to_colPair)
export(convert_seurat_to_sce)
export(convert_to_anndata)
export(create_combined_assay)
export(default_reduction_map)
export(diagnose_anndata_python)
export(ensure_csparse_matrix)
export(extract_anndata_X)
export(extract_anndata_layers)
export(extract_anndata_obsm)
export(extract_anndata_obsp)
export(extract_anndata_raw)
export(extract_counts_matrix)
export(extract_data)
export(extract_pairs)
Expand All @@ -20,6 +32,7 @@ export(process_layers)
export(process_main_assay)
export(process_metadata_and_pairwise)
export(process_other_assays)
export(setup_anndata_python)
export(timestamped_cat)
export(update_seurat_object)
import(Matrix)
Expand All @@ -36,6 +49,8 @@ importFrom(S4Vectors,subjectHits)
importFrom(Seurat,"DefaultAssay<-")
importFrom(Seurat,Cells)
importFrom(Seurat,CreateAssayObject)
importFrom(Seurat,CreateDimReducObject)
importFrom(Seurat,CreateSeuratObject)
importFrom(Seurat,DefaultAssay)
importFrom(Seurat,GetAssayData)
importFrom(Seurat,SetAssayData)
Expand All @@ -54,11 +69,14 @@ importFrom(SingleCellExperiment,reducedDim)
importFrom(SingleCellExperiment,reducedDims)
importFrom(SingleCellExperiment,removeAltExps)
importFrom(SingleCellExperiment,rowPairs)
importFrom(SummarizedExperiment,"colData<-")
importFrom(SummarizedExperiment,"rowData<-")
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(anndata,AnnData)
importFrom(anndata,read_h5ad)
importFrom(anndata,write_h5ad)
importFrom(methods,as)
importFrom(methods,is)
Expand Down
95 changes: 95 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# convert2anndata 0.3.0

## Bidirectional conversion

`convert2anndata` is now a bidirectional converter. In addition to the
existing SingleCellExperiment / Seurat → AnnData direction, it can
convert AnnData (`.h5ad`) objects into Seurat or SingleCellExperiment
objects:

* `convert_anndata_to_seurat()` — AnnData → Seurat.
* `convert_anndata_to_sce()` — AnnData → SingleCellExperiment.

Both accept either an in-memory AnnData object or a path to an `.h5ad`
file. AnnData components are mapped as follows: `X` / `layers` →
assays, `obsm` → dimensional reductions, `obs` / `var` → cell / feature
metadata, and `obsp` → Seurat graphs (for the Seurat target).

## Command-line interface

* `cli_convert()` now dispatches on the input file extension:
`.rds` → `.h5ad` and `.h5ad` → `.rds`.
* The `.h5ad` → `.rds` direction supports `-t seurat` (default) or
`-t sce` to choose the target object type.

## Python / reticulate setup

New helpers make the reticulate ↔ Python `anndata` wiring explicit and
easier to debug:

* `setup_anndata_python()` — resolve and activate a Python environment
that has `anndata` installed.
* `check_anndata_python()` — fail fast with an actionable message when
Python, `anndata`, or `numpy` are not reachable.
* `diagnose_anndata_python()` — print a full diagnostic of the active
Python environment.

The path entry points to the converters call `check_anndata_python()`
automatically.

## Seurat 5 support and `use_raw`

* Conversions are robust across Seurat 4 / Seurat 5 layer and slot
naming.
* `use_raw = "auto"` (the default) uses `adata.raw` as the counts
matrix only when no requested `counts_layer` is present; `"always"`
and `"never"` force the behaviour, and a logical value is still
accepted for backward compatibility.

## New exported helpers

The full `extract_anndata_*` family is now exported for building custom
conversions: `extract_anndata_X()`, `extract_anndata_layers()`,
`extract_anndata_obsm()`, `extract_anndata_obsp()`, and
`extract_anndata_raw()`. Also new: `attach_reductions_seurat()`.

## Tests, evaluation harness, and validation

* About ten new test files cover the AnnData → Seurat / SCE direction,
edge cases, sparse-matrix handling, `raw` / `obsp`, and realistic
scanpy-style datasets. Tests that need Python `anndata` skip
gracefully when it is unavailable.
* A standalone evaluation harness (`eval/`) exercises full roundtrips,
edge cases, a real pbmc3k dataset, and a quantitative
roundtrip-fidelity check.

### Validation results (0.3.0)

Validated with R 4.4.1, Seurat 5.4.0, SingleCellExperiment 1.28.1,
reticulate 1.45.0, and Python `anndata` 0.12.0:

* `R CMD check`: 0 errors, 0 warnings, 3 (benign) notes.
* `testthat`: 387 passing, 0 failing, 1 skipped (a CLI subprocess test
that needs the package installed on the library path).
* Roundtrip fidelity: `X`, `obs`, `var`, `obsm`, and `obsp` roundtrip
within numeric tolerance through the SCE-mediated path; `obsp` also
roundtrips through the Seurat-mediated path.

### Known limitations

* The Seurat-mediated path (`AnnData → Seurat → ...`) cannot preserve
arbitrary extra `layers` or `var` (feature) metadata, because a
Seurat assay only has `counts` / `data` / `scale.data` slots and no
feature-metadata store. Use the SCE-mediated path when verbatim
layers or `var` columns matter.
* `convert_anndata_to_sce()` does not yet attach `obsp` matrices as
`colPairs` (the Seurat target does attach them as graphs). This is a
planned follow-up.

## Documentation

* `README` rewritten with a direction reference table, Python
environment notes, and a troubleshooting section.
* `_pkgdown.yml` updated for bidirectional conversion with a grouped
reference index.
* `DESCRIPTION` retitled and bumped to 0.3.0.
38 changes: 38 additions & 0 deletions R/anndata_keys.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Extract dict-like keys from an AnnData mapping (layers, obsm, uns, ...)
#'
#' AnnData's mapping attributes (`layers`, `obsm`, `uns`, ...) expose a
#' `.keys()` method. Different combinations of `reticulate` and `anndata`
#' return that result in different shapes:
#'
#' - newer reticulate auto-converts the Python `KeysView` to an R character
#' vector (so `as.character(builtins$list(x$keys()))` runs `list()` on a
#' char-vec and splits each name into individual characters);
#' - older versions hand back a Python object that needs `builtins$list()`
#' to materialise.
#'
#' This helper handles both cases, plus the edge case where the mapping has
#' no `.keys()` method but `names()` works.
#'
#' @param mapping The mapping attribute (e.g. `adata$layers`).
#' @return A character vector of keys, or `character(0)` on failure.
#' @keywords internal
anndata_mapping_keys <- function(mapping) {
raw <- tryCatch(mapping$keys(), error = function(e) NULL)

if (is.character(raw)) {
return(raw)
}

if (!is.null(raw)) {
builtins <- reticulate::import_builtins()
py_listed <- tryCatch(builtins$list(raw), error = function(e) NULL)
if (is.character(py_listed)) return(py_listed)
if (!is.null(py_listed)) {
return(tryCatch(as.character(py_listed), error = function(e) character(0)))
}
}

nm <- tryCatch(names(mapping), error = function(e) NULL)
if (is.character(nm)) return(nm)
character(0)
}
23 changes: 23 additions & 0 deletions R/anndata_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' Extract obs/var names from an AnnData object
#'
#' Cell and feature names on AnnData objects are Python pandas Indexes;
#' R's `rownames()`/`colnames()` do not return them reliably. This helper
#' coerces them to character vectors via Python's `list()` builtin, falling
#' back to `names()` if that path fails.
#'
#' @param adata An AnnData object (e.g. as returned by `anndata::read_h5ad`).
#' @return A list with elements `obs_names` (cells) and `var_names` (features),
#' each a character vector or `NULL` if extraction failed.
#' @export
anndata_names <- function(adata) {
builtins <- reticulate::import_builtins()
obs_names <- tryCatch(
as.character(builtins$list(adata$obs_names)),
error = function(e) NULL
)
var_names <- tryCatch(
as.character(builtins$list(adata$var_names)),
error = function(e) NULL
)
list(obs_names = obs_names, var_names = var_names)
}
2 changes: 1 addition & 1 deletion R/attach_alt_experiments_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ attach_alt_experiments_sce <- function(data, sce, altExp_names) {
if (inherits(alt_assay, "Assay5")) {
alt_counts <- GetAssayData(alt_assay, layer = "counts")
} else if (inherits(alt_assay, "Assay")) {
alt_counts <- GetAssayData(alt_assay, slot = "counts")
alt_counts <- GetAssayData(alt_assay, layer = "counts")
} else {
next # Unsupported assay type
}
Expand Down
91 changes: 91 additions & 0 deletions R/attach_reductions_seurat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' Default mapping from AnnData obsm keys to Seurat reduction (name, key) pairs
#'
#' Provides the canonical Seurat naming for the obsm keys conventionally
#' produced by scanpy. Users who want extra mappings should pass a list with
#' the same shape into `attach_reductions_seurat()` (or its callers).
#'
#' @return A named list. Each name is an obsm key (e.g. `X_pca`); each
#' element is a list with `name` (Seurat reduction name, e.g. `"pca"`)
#' and `key` (column-name prefix, e.g. `"PC_"`).
#' @export
default_reduction_map <- function() {
list(
X_pca = list(name = "pca", key = "PC_"),
X_umap = list(name = "umap", key = "UMAP_"),
X_tsne = list(name = "tsne", key = "tSNE_"),
X_diffmap = list(name = "diffmap", key = "DM_"),
X_phate = list(name = "phate", key = "PHATE_"),
X_harmony = list(name = "harmony", key = "harmony_"),
X_scvi = list(name = "scvi", key = "scVI_"),
X_lsi = list(name = "lsi", key = "LSI_"),
X_spectral = list(name = "spectral", key = "spectral_")
)
}

#' Attach obsm embeddings to a Seurat object as dimensional reductions
#'
#' Uses an obsm-key -> (reduction-name, key-prefix) mapping. Keys not in the
#' mapping fall back to a derived name: leading `X_` stripped, lowercased,
#' with a sanitized key prefix.
#'
#' @param seurat_obj A Seurat object to attach reductions to.
#' @param obsm Named list of numeric matrices (cells x dim), as returned by
#' `extract_anndata_obsm()`.
#' @param assay Assay name to associate the reductions with. Defaults to "RNA".
#' @param reduction_map Named list overriding or extending
#' `default_reduction_map()`. Each entry maps an obsm key (e.g. `"X_pca"`)
#' to a list with `name` and `key`. The supplied map is merged on top of
#' the defaults; pass an empty list to keep defaults, or pass an entry
#' with `name = NA` to disable a default.
#' @return The Seurat object with reductions attached.
#' @importFrom Seurat CreateDimReducObject
#' @export
attach_reductions_seurat <- function(seurat_obj, obsm, assay = "RNA",
reduction_map = list()) {
if (length(obsm) == 0) return(seurat_obj)

merged_map <- default_reduction_map()
for (k in names(reduction_map)) {
merged_map[[k]] <- reduction_map[[k]]
}

for (k in names(obsm)) {
mat <- obsm[[k]]
if (nrow(mat) != ncol(seurat_obj)) {
warning(sprintf(
"obsm['%s'] has %d rows but the Seurat object has %d cells; skipping.",
k, nrow(mat), ncol(seurat_obj)
))
next
}
if (k %in% names(merged_map)) {
entry <- merged_map[[k]]
if (is.null(entry) || (length(entry$name) == 1 && is.na(entry$name))) {
timestamped_cat(sprintf("Skipping obsm['%s'] (disabled in reduction_map).\n", k))
next
}
red_name <- entry$name
red_key <- entry$key %||% paste0(red_name, "_")
} else {
red_name <- tolower(sub("^X_", "", k))
red_key <- paste0(red_name, "_")
}
sanitized_key <- gsub("[^[:alnum:]]", "", red_key)
if (!grepl("_$", sanitized_key)) sanitized_key <- paste0(sanitized_key, "_")
colnames(mat) <- paste0(sanitized_key, seq_len(ncol(mat)))
# Seurat may have rewritten cell names during object construction; align
# the embedding rownames to the seurat object so CreateDimReducObject
# accepts them.
rownames(mat) <- colnames(seurat_obj)

seurat_obj[[red_name]] <- Seurat::CreateDimReducObject(
embeddings = mat,
key = sanitized_key,
assay = assay
)
timestamped_cat(sprintf("Added obsm['%s'] as reduction '%s'.\n", k, red_name))
}
seurat_obj
}

`%||%` <- function(a, b) if (is.null(a)) b else a
Loading
Loading