diff --git a/DESCRIPTION b/DESCRIPTION index 5322984..f64b543 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Moonlight2R Type: Package Title: Identify oncogenes and tumor suppressor genes from omics data -Version: 1.11.0 +Version: 1.11.1 Date: Authors@R: c(person("Mona", "Nourbakhsh", @@ -90,7 +90,8 @@ SystemRequirements: CScapeSomatic VignetteBuilder: knitr URL: https://github.com/ELELAB/Moonlight2R BugReports: https://github.com/ELELAB/Moonlight2R/issues -RoxygenNote: 7.3.3 LazyData: false Encoding: UTF-8 Config/testthat/edition: 3 +Config/roxygen2/version: 8.0.0 +RoxygenNote: 7.3.3 diff --git a/NEWS.md b/NEWS.md index f0d4d4c..fa73ccf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# Moonlight2R 1.11.1 + +## Summary + +* Fix FEA Moonlight Z-score to consider experimental logFC in the score calculation + # Moonlight2R 1.9.4 ## Summary diff --git a/R/FEA.R b/R/FEA.R index 78a2859..bac3c43 100644 --- a/R/FEA.R +++ b/R/FEA.R @@ -25,7 +25,7 @@ FEA <- function(BPname = NULL, # List of variable names variables_to_check <- c("DiseaseList", "EAGenes") - + # Check and load variables if they do not exist for (variable_name in variables_to_check) { if (! variable_name %in% names(.GlobalEnv)) { @@ -74,7 +74,6 @@ FEA <- function(BPname = NULL, # sorting strategy to avoid different sorting in case of tie ranks rankings <- rankings[order(-rankings, names(rankings))] - pathwayNamesList <- list() for (k in seq_along(lf2)) { @@ -98,14 +97,39 @@ FEA <- function(BPname = NULL, setTxtProgressBar(pb, k) - res <- as.data.frame(matrix(0, nrow = 1, ncol = 2, - dimnames = list(1, c("pathway", - "Moonlight.Z.score")))) + res <- as.data.frame(matrix(0, nrow = 1, ncol = 5, + dimnames = list(1, c("Diseases.or.Functions.Annotation", + "Moonlight.Z.score", + "commonNg", + "FunctionNg", + "Molecules")))) + + GeneList <- data.frame(PROBE_ID = rownames(DiffMatrix), + logFC = DiffMatrix$logFC, + stringsAsFactors = FALSE) + rownames(GeneList) <- GeneList$PROBE_ID + selected_diseases <- as.data.frame(DiseaseList[[which(names(DiseaseList) == lf2[k])]]) + + selected_diseases$ID <- selected_diseases$Genes.in.dataset + + res$commonNg <- length(intersect(GeneList$PROBE_ID, selected_diseases$ID)) + + res$FunctionNg <- nrow(selected_diseases) + + res$Diseases.or.Functions.Annotation <- lf2[k] + + selected_diseases <- selected_diseases[selected_diseases$ID %in% rownames(DiffMatrix), ] - res$pathway <- lf2[k] + selected_diseases$logFC <- DiffMatrix$logFC[match(selected_diseases$ID, rownames(DiffMatrix))] + selected_diseases <- selected_diseases[!is.na(selected_diseases$logFC), ] + + rownames(selected_diseases) <- selected_diseases$ID + + res$Molecules <- paste0(GeneList$PROBE_ID, collapse = ",") + Zscore <- .compute_moonlight_zscore(selected_diseases) res$Moonlight.Z.score <- Zscore @@ -118,13 +142,14 @@ FEA <- function(BPname = NULL, bp_score_collection_merged <- bind_rows(bp_score_collection) - TableDiseasesNew <- fgseaRes %>% left_join(bp_score_collection_merged, by = "pathway") - TableDiseasesNew <- TableDiseasesNew %>% select(-log2err) + TableDiseasesNew <- fgseaRes %>% left_join(bp_score_collection_merged, by = c("pathway" = "Diseases.or.Functions.Annotation")) + + TableDiseasesNew <- TableDiseasesNew %>% select(-log2err, -size) TableDiseasesNew <- TableDiseasesNew %>% rename("Diseases.or.Functions.Annotation" = pathway, "p.value" = pval) %>% - select("Diseases.or.Functions.Annotation", "Moonlight.Z.score","p.value", "padj", "ES", "NES", "size", "leadingEdge") + select("Diseases.or.Functions.Annotation", "Moonlight.Z.score","p.value", "padj", "ES", "NES", "commonNg", "FunctionNg", "Molecules") - + } else if (method == "ora") { @@ -173,37 +198,17 @@ FEA <- function(BPname = NULL, } GeneList <- GeneList[GeneList$PROBE_ID %in% selected_diseases$ID, ] - + selected_diseases <- selected_diseases[selected_diseases$ID %in% GeneList[, "PROBE_ID"], ] - selected_diseases[, "Exp.Log.Ratio"] <- gsub(",", ".", selected_diseases[, "Exp.Log.Ratio"]) - selected_diseases[, "Exp.Log.Ratio"] <- as.numeric(selected_diseases[, "Exp.Log.Ratio"]) - rownames(selected_diseases) <- selected_diseases$ID + selected_diseases$logFC <- GeneList$logFC[match(selected_diseases$ID, GeneList$PROBE_ID)] + + selected_diseases <- selected_diseases[!is.na(selected_diseases$logFC), ] + rownames(selected_diseases) <- selected_diseases$ID + res$Molecules <- paste0(GeneList$PROBE_ID, collapse = ",") - for (idx in seq.int(nrow(selected_diseases))) { - - currTR <- selected_diseases$ID[idx] - - if (length(grep("Increases", selected_diseases[currTR, "Findings"])) == 1) { - - if (sign(GeneList[currTR, "logFC"]) > 0) { - selected_diseases[currTR, "Prediction..based.on.expression.direction."] <- "Increased" - } else if (sign(GeneList[currTR, "logFC"]) < 0) { - selected_diseases[currTR, "Prediction..based.on.expression.direction."] <- "Decreased" - } - } - - if (length(grep("Decreases", selected_diseases[currTR, "Findings"])) == 1) { - if (sign(GeneList[currTR, "logFC"]) < 0) { - selected_diseases[currTR, "Prediction..based.on.expression.direction."] <- "Increased" - } else if (sign(GeneList[currTR, "logFC"]) > 0) { - selected_diseases[currTR, "Prediction..based.on.expression.direction."] <- "Decreased" - } - } - } - Zscore <- .compute_moonlight_zscore(selected_diseases) res$Moonlight.Z.score <- Zscore @@ -237,22 +242,25 @@ FEA <- function(BPname = NULL, #' @noRd .compute_moonlight_zscore <- function(selected_diseases) { + if(!all(c("Findings", "logFC") %in% colnames(selected_diseases))){ + stop("The input DiseaseList does not contain 'Findings' and 'logFC' columns for Moonlight z-score calculations") + } + Correlation <- matrix(0, nrow(selected_diseases), 1) selected_diseases <- cbind(selected_diseases, Correlation) - - if (length(grep("Decreases", selected_diseases$Findings)) != 0) { + if (any(grepl("Increases|Decreases", selected_diseases$Findings, ignore.case = TRUE))){ selected_diseases[grep("Decreases", selected_diseases$Findings), "Findings"] <- -1 selected_diseases[grep("Increases", selected_diseases$Findings), "Findings"] <- 1 selected_diseases[grep("Affects", selected_diseases$Findings), "Findings"] <- 0 selected_diseases[, "Findings"] <- as.numeric(selected_diseases[, "Findings"]) - selected_diseases[, "Exp.Log.Ratio"] <- gsub(",", ".", selected_diseases[, "Exp.Log.Ratio"]) - selected_diseases[, "Exp.Log.Ratio"] <- as.numeric(selected_diseases[, "Exp.Log.Ratio"]) - - PredictionIncreased <- which(sign(selected_diseases$Exp.Log.Ratio) == selected_diseases$Findings) - PredictionDecreased <- which(sign(selected_diseases$Exp.Log.Ratio) != selected_diseases$Findings) + selected_diseases[, "logFC"] <- gsub(",", ".", selected_diseases[, "logFC"]) + selected_diseases[, "logFC"] <- as.numeric(selected_diseases[, "logFC"]) + + PredictionIncreased <- which(sign(selected_diseases$logFC) == selected_diseases$Findings) + PredictionDecreased <- which(sign(selected_diseases$logFC) != selected_diseases$Findings) PredictionAffected <- which(sign(selected_diseases$Findings) == 0) selected_diseases[PredictionIncreased, "Correlation"] <- 1 @@ -263,7 +271,7 @@ FEA <- function(BPname = NULL, } else { Zscore <- 0 } - + return(Zscore) -} \ No newline at end of file +} diff --git a/R/PRA.R b/R/PRA.R index cf678bc..d8ac723 100644 --- a/R/PRA.R +++ b/R/PRA.R @@ -13,7 +13,7 @@ #' data(DiseaseList) #' data(tabGrowBlock) #' data(knownDriverGenes) -#' dataPRA <- PRA(dataURA = dataURA[seq.int(2),], +#' dataPRA <- PRA(dataURA = dataURA, #' BPname = c("apoptosis","proliferation of cells"), #' thres.role = 0) PRA <- function(dataURA, diff --git a/R/Rdata.R b/R/Rdata.R index 5a5df4d..6fd8978 100644 --- a/R/Rdata.R +++ b/R/Rdata.R @@ -126,9 +126,9 @@ #'@usage data(dataFEA_fgsea) #'@name dataFEA_fgsea #'@aliases dataFEA_fgsea -#'@return A dataframe of dimension 101x8 +#'@return A dataframe of dimension 101x9 #' -#'@format A dataframe of dimension 101x8 +#'@format A dataframe of dimension 101x9 #' "dataFEA_fgsea" diff --git a/data/dataFEA.rda b/data/dataFEA.rda index e61950f..6e522b3 100644 Binary files a/data/dataFEA.rda and b/data/dataFEA.rda differ diff --git a/data/dataFEA_fgsea.rda b/data/dataFEA_fgsea.rda index af9a81c..203e3f0 100644 Binary files a/data/dataFEA_fgsea.rda and b/data/dataFEA_fgsea.rda differ diff --git a/data/dataGRN.rda b/data/dataGRN.rda index 66889f4..637c9ed 100644 Binary files a/data/dataGRN.rda and b/data/dataGRN.rda differ diff --git a/data/dataPRA.rda b/data/dataPRA.rda index 4e5ce77..d21cb6e 100644 Binary files a/data/dataPRA.rda and b/data/dataPRA.rda differ diff --git a/data/dataURA.rda b/data/dataURA.rda index c0122f6..d4e99a4 100644 Binary files a/data/dataURA.rda and b/data/dataURA.rda differ diff --git a/man/PRA.Rd b/man/PRA.Rd index 0b2bc26..f9c4169 100644 --- a/man/PRA.Rd +++ b/man/PRA.Rd @@ -24,7 +24,7 @@ data(dataURA) data(DiseaseList) data(tabGrowBlock) data(knownDriverGenes) -dataPRA <- PRA(dataURA = dataURA[seq.int(2),], +dataPRA <- PRA(dataURA = dataURA, BPname = c("apoptosis","proliferation of cells"), thres.role = 0) } diff --git a/man/dataFEA_fgsea.Rd b/man/dataFEA_fgsea.Rd index b034621..efbb6d5 100644 --- a/man/dataFEA_fgsea.Rd +++ b/man/dataFEA_fgsea.Rd @@ -5,13 +5,13 @@ \alias{dataFEA_fgsea} \title{Functional enrichment analysis with fgsea method} \format{ -A dataframe of dimension 101x8 +A dataframe of dimension 101x9 } \usage{ data(dataFEA_fgsea) } \value{ -A dataframe of dimension 101x8 +A dataframe of dimension 101x9 } \description{ The output of the FEA function which does enrichment analysis using fgsea method diff --git a/tests/testthat/test-FEA.R b/tests/testthat/test-FEA.R index 6d3a57f..39184d1 100644 --- a/tests/testthat/test-FEA.R +++ b/tests/testthat/test-FEA.R @@ -6,7 +6,7 @@ data("DiseaseList") data("EAGenes") DEGsmatrix_ora <- DEGsmatrix[1:10, ] dataFEA_test_ora <- FEA(DiffMatrix = DEGsmatrix_ora, method = "ora") -dataFEA_test_fgsea <- FEA(DiffMatrix = DEGsmatrix, method = "fgsea", seed = 123) +dataFEA_test_fgsea <- FEA(DiffMatrix = DEGsmatrix, method= "fgsea", seed = 123) # Load example data of FEA serving as reference points data(dataFEA) data(dataFEA_fgsea) @@ -47,11 +47,11 @@ test_that("FEA output is identical to reference point", { # Test number of columns test_that("Number of columns in FEA output are equal to 8", { - expect_equal(dim(dataFEA_test_fgsea), c(101, 8)) + expect_equal(dim(dataFEA_test_fgsea), c(101, 9)) }) dataFEA_colnames <- c("Diseases.or.Functions.Annotation","Moonlight.Z.score", "p.value", - "padj", "ES", "NES", "size", "leadingEdge") + "padj", "ES", "NES", "commonNg", "FunctionNg", "Molecules") # Test correct column names test_that("Column names in FEA output are correct", { @@ -61,13 +61,15 @@ test_that("Column names in FEA output are correct", { # Test expected class of values in output test_that("Moonlight scores, p-values and FDR values are numeric", { expect_type(dataFEA_test_fgsea$Diseases.or.Functions.Annotation, "character") + expect_type(dataFEA_test_ora$Moonlight.Z.score, "double") expect_type(dataFEA_test_fgsea$p.value, "double") expect_type(dataFEA_test_fgsea$padj, "double") expect_type(dataFEA_test_fgsea$ES, "double") expect_type(dataFEA_test_fgsea$NES, "double") - expect_type(dataFEA_test_fgsea$size, "integer") - expect_type(dataFEA_test_fgsea$leadingEdge, "list") - expect_true(all(sapply(dataFEA_test_fgsea$leadingEdge, is.character))) + expect_type(dataFEA_test_fgsea$commonNg, "integer") + expect_type(dataFEA_test_fgsea$FunctionNg, "integer") + expect_type(dataFEA_test_fgsea$Molecules, "character") + expect_true(all(sapply(dataFEA_test_fgsea$Molecules, is.character))) }) diff --git a/tests/testthat/test-TFinfluence.R b/tests/testthat/test-TFinfluence.R index b647ddd..0466a58 100644 --- a/tests/testthat/test-TFinfluence.R +++ b/tests/testthat/test-TFinfluence.R @@ -9,17 +9,17 @@ data(dataMAVISp) reference_TFresults <- data.frame( - Target = c("CHST1", "GEN1", "TAP1", "TAP1"), - Moonlight_gene_z_score = c(1.06066017177982, 1.30096115353815, 0.8262079533974, 0.8262079533974), - Moonlight_Oncogenic_Mediator = c("TSG", "OCG", "OCG", "OCG"), - logFC_target = c(1.94917640749552, 1.36435629696216, 1.36486453774995, 1.36486453774995), - TF = c(NA, NA, "IRF1", "IRF2"), - InteractionType = c(NA, NA, "Activation", "Activation"), - PMID = c(NA, NA, "18694960", "15778351"), + Target = c("PDE2A", "GCAT", "ARMC9", "FAM155A"), + Moonlight_gene_z_score = c(1.37642904406401, 0.810977174901446, 0.367466020609254, 1.19863281130568), + Moonlight_Oncogenic_Mediator = c("TSG", "TSG", "TSG", "OCG"), + logFC_target = c(-3.36278468754307, 1.0407914607103, 1.03363210942098, 1.97338960700756), + TF = as.character(c(NA, NA, NA, NA)), + InteractionType = as.character(c(NA, NA, NA, NA)), + PMID = as.character(c(NA, NA, NA, NA)), tf_mutation = as.character(c(NA, NA, NA, NA)), stab_class = as.character(c(NA, NA, NA, NA)), in_MAVISp = c(FALSE,FALSE,FALSE,FALSE), - mutation_available = c(FALSE,FALSE,FALSE,FALSE), + mutation_available = c(FALSE, FALSE, FALSE, FALSE), stringsAsFactors = FALSE ) diff --git a/vignettes/URAplot.gif b/vignettes/URAplot.gif index b740f96..cb7cea5 100644 Binary files a/vignettes/URAplot.gif and b/vignettes/URAplot.gif differ