Skip to content

Commit badf325

Browse files
authored
Merge pull request #740 from opencb/TASK-8160
TASK-8160 - Fix the CellBase command 'index'
2 parents 863b237 + 9f1e539 commit badf325

9 files changed

Lines changed: 97 additions & 66 deletions

File tree

cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/IndexCommandExecutor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ public void execute() {
5959

6060
Path indexFile = Paths.get(this.appHome).resolve("conf").resolve("mongodb-indexes.json");
6161
logger.info("Using index configuration file: {}", indexFile.toAbsolutePath());
62-
IndexManager indexManager = new IndexManager(indexCommandOptions.database, indexFile, configuration);
62+
IndexManager indexManager = new IndexManager(indexCommandOptions.database, indexFile, dataReleaseManager, configuration);
6363
if (indexCommandOptions.validate) {
6464
indexManager.validateMongoDBIndexes(indexCommandOptions.data);
6565
} else {
66-
indexManager.createMongoDBIndexes(indexCommandOptions.data, indexCommandOptions.dataRelease,
66+
indexManager.createMongoDBIndexes(indexCommandOptions.data, Integer.parseInt(indexCommandOptions.dataRelease),
6767
indexCommandOptions.dropIndexesFirst);
6868
}
6969

cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public void execute() throws CellBaseException {
110110
loadRunner = new LoadRunner(loader, database, numThreads, dataReleaseManager, configuration);
111111
if (createIndexes) {
112112
Path indexFile = Paths.get(this.appHome).resolve("conf").resolve("mongodb-indexes.json");
113-
indexManager = new IndexManager(database, indexFile, configuration);
113+
indexManager = new IndexManager(database, indexFile, dataReleaseManager, configuration);
114114
}
115115

116116
for (String data : dataList) {

cellbase-lib/src/main/java/org/opencb/cellbase/lib/indexer/IndexManager.java

Lines changed: 52 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@
1919
import com.fasterxml.jackson.databind.DeserializationFeature;
2020
import com.fasterxml.jackson.databind.MapperFeature;
2121
import com.fasterxml.jackson.databind.ObjectMapper;
22+
import org.apache.commons.collections4.CollectionUtils;
2223
import org.apache.commons.lang3.StringUtils;
2324
import org.opencb.cellbase.core.config.CellBaseConfiguration;
25+
import org.opencb.cellbase.core.exception.CellBaseException;
26+
import org.opencb.cellbase.core.models.Release;
27+
import org.opencb.cellbase.lib.EtlCommons;
2428
import org.opencb.cellbase.lib.db.MongoDBManager;
29+
import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
30+
import org.opencb.cellbase.lib.managers.DataReleaseManager;
2531
import org.opencb.commons.datastore.core.ObjectMap;
2632
import org.opencb.commons.datastore.mongodb.MongoDBIndexUtils;
2733
import org.opencb.commons.datastore.mongodb.MongoDataStore;
@@ -35,10 +41,12 @@
3541
import java.util.*;
3642

3743
import static org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR;
44+
import static org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor.buildCollectionName;
3845

3946

4047
public class IndexManager {
4148

49+
private DataReleaseManager dataReleaseManager;
4250
private CellBaseConfiguration configuration;
4351
private Logger logger;
4452
private String databaseName;
@@ -48,31 +56,10 @@ public class IndexManager {
4856

4957
private Map<String, List<Map<String, ObjectMap>>> indexes;
5058

51-
private static final Map<String, List<String>> DATA_COLLECTIONS = new HashMap<>();
52-
53-
static {
54-
DATA_COLLECTIONS.put("genome", Arrays.asList("genome_info", "genome_sequence"));
55-
DATA_COLLECTIONS.put("conservation", Collections.singletonList("conservation"));
56-
DATA_COLLECTIONS.put("repeats", Collections.singletonList("repeats"));
57-
DATA_COLLECTIONS.put("gene", Arrays.asList("gene", "refseq"));
58-
DATA_COLLECTIONS.put("protein", Collections.singletonList("protein"));
59-
DATA_COLLECTIONS.put("regulation", Arrays.asList("regulatory_region", "regulatory_pfm"));
60-
DATA_COLLECTIONS.put("variation", Collections.singletonList("variation"));
61-
DATA_COLLECTIONS.put("variation_functional_score", Collections.singletonList("missense_variation_functional_score"));
62-
DATA_COLLECTIONS.put("protein_functional_prediction", Collections.singletonList("protein_functional_prediction"));
63-
DATA_COLLECTIONS.put("revel", Collections.singletonList("revel"));
64-
DATA_COLLECTIONS.put("alphamissense", Collections.singletonList("alphamissense"));
65-
DATA_COLLECTIONS.put("clinical_variants", Collections.singletonList("clinical_variants"));
66-
DATA_COLLECTIONS.put("splice_score", Collections.singletonList("splice_score"));
67-
DATA_COLLECTIONS.put("ontology", Collections.singletonList("ontology"));
68-
DATA_COLLECTIONS.put("pubmed", Collections.singletonList("pubmed"));
69-
DATA_COLLECTIONS.put("pharmacogenomics", Collections.singletonList("pharmacogenomics"));
70-
DATA_COLLECTIONS.put("polygenic_score", Arrays.asList("variant_polygenic_score", "common_polygenic_score"));
71-
}
72-
73-
public IndexManager(String databaseName, Path indexFile, CellBaseConfiguration configuration) {
59+
public IndexManager(String databaseName, Path indexFile, DataReleaseManager dataReleaseManager, CellBaseConfiguration configuration) {
7460
this.databaseName = databaseName;
7561
this.indexFile = indexFile;
62+
this.dataReleaseManager = dataReleaseManager;
7663
this.configuration = configuration;
7764

7865
init();
@@ -82,8 +69,6 @@ private void init() {
8269
logger = LoggerFactory.getLogger(this.getClass());
8370
mongoDBManager = new MongoDBManager(configuration);
8471

85-
// Path indexFile = Paths.get("./cellbase-lib/src/main/resources/mongodb-indexes.json");
86-
8772
MongoDataStore mongoDBDatastore = mongoDBManager.createMongoDBDatastore(databaseName);
8873
mongoDBIndexUtils = new MongoDBIndexUtils(mongoDBDatastore, indexFile);
8974

@@ -99,24 +84,46 @@ private void init() {
9984
* @param dropIndexesFirst if TRUE, deletes the index before creating a new one. FALSE, no index is created if it
10085
* already exists.
10186
* @throws IOException if configuration file can't be read
87+
* @throws CellBaseException if DataRelease manager raises an exception
10288
*/
103-
@Deprecated
104-
public void createMongoDBIndexes(String data, String dataRelease, boolean dropIndexesFirst) throws IOException {
105-
// InputStream indexResourceStream = getClass().getResourceAsStream("mongodb-indexes.json");
89+
public void createMongoDBIndexes(String data, int dataRelease, boolean dropIndexesFirst) throws IOException, CellBaseException {
90+
Release release = dataReleaseManager.get(dataRelease);
91+
92+
List<String> collections = new ArrayList<>();
10693
if (StringUtils.isEmpty(data) || "all".equalsIgnoreCase(data)) {
107-
mongoDBIndexUtils.createAllIndexes(dropIndexesFirst);
108-
// mongoDBIndexUtils.createAllIndexes(mongoDataStore, indexResourceStream, dropIndexesFirst);
109-
logger.info("Loaded all indexes");
94+
logger.info("Indexing all data ({}) for data release {}", StringUtils.join(release.getCollections().keySet(), ", "),
95+
dataRelease);
96+
for (Map.Entry<String, String> entry : release.getCollections().entrySet()) {
97+
// Sanity check
98+
if (!entry.getValue().endsWith(DATA_RELEASE_SEPARATOR + dataRelease)) {
99+
throw new CellBaseException("Something wrong when indexing: Collection " + entry.getValue() + " found when indexing"
100+
+ " data release " + dataRelease);
101+
}
102+
collections.add(entry.getValue());
103+
}
110104
} else {
111105
List<String> dataList = Arrays.asList(data.split(","));
112106
for (String dataName : dataList) {
113-
List<String> collections = new ArrayList<>();
114-
for (String collection : DATA_COLLECTIONS.get(dataName)) {
115-
collections.add(collection + DATA_RELEASE_SEPARATOR + dataRelease);
107+
if (release.getCollections().containsKey(dataName)) {
108+
collections.add(release.getCollections().get(dataName));
109+
} else {
110+
throw new CellBaseException("Error indexing: data '" + dataName + "' missing in data release " + dataRelease
111+
+ " (" + StringUtils.join(release.getCollections().keySet(), ", ") + ")");
116112
}
117-
createMongoDBIndexes(collections, dropIndexesFirst);
118113
}
119114
}
115+
116+
// Remove temporary polygenic score collections
117+
collections.remove(CellBaseDBAdaptor.buildCollectionName(EtlCommons.PGS_COMMON_COLLECTION, dataRelease));
118+
collections.remove(CellBaseDBAdaptor.buildCollectionName(EtlCommons.PGS_VARIANT_COLLECTION, dataRelease));
119+
120+
// Check collection names
121+
if (CollectionUtils.isEmpty(collections)) {
122+
throw new CellBaseException("No collections to index");
123+
}
124+
125+
// Create MongoDB indexes
126+
createMongoDBIndexes(collections, dropIndexesFirst);
120127
}
121128

122129
public void createMongoDBIndexes(List<String> collections, boolean dropIndexesFirst) throws IOException {
@@ -154,13 +161,21 @@ public void validateMongoDBIndexes(String collectionName) throws IOException {
154161
}
155162
}
156163
}
157-
164+
private void createAllIndexes(int dataRelease, boolean dropIndexesFirst) throws IOException {
165+
Map<String, List<Map<String, ObjectMap>>> indexes = getIndexesFromFile();
166+
167+
for (String key : indexes.keySet()) {
168+
String collectionName = buildCollectionName(key, dataRelease);
169+
logger.info("Creating index for collection {}", collectionName);
170+
mongoDBIndexUtils.createIndexes(collectionName, indexes.get(key), dropIndexesFirst);
171+
logger.info("Done.");
172+
}
173+
}
158174
private void checkIndexes() throws IOException {
159175
if (indexes == null) {
160176
indexes = getIndexesFromFile();
161177
}
162178
}
163-
164179
private Map<String, List<Map<String, ObjectMap>>> getIndexesFromFile() throws IOException {
165180
ObjectMapper objectMapper = generateDefaultObjectMapper();
166181
Map<String, List<Map<String, ObjectMap>>> indexes = new HashMap<>();

cellbase-lib/src/main/resources/mongodb-indexes.json

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,5 @@
186186
{"collection": "protein_substitution_prediction", "fields": {"transcriptId": 1}, "options": {"background": true}}
187187
{"collection": "protein_substitution_prediction", "fields": {"aaPosition": 1}, "options": {"background": true}}
188188

189-
{"collection": "common_polygenic_score", "fields": {"id": 1}, "options": {"background": true}}
190-
{"collection": "common_polygenic_score", "fields": {"name": 1}, "options": {"background": true}}
191-
{"collection": "common_polygenic_score", "fields": {"source": 1}, "options": {"background": true}}
192-
{"collection": "variant_polygenic_score", "fields": {"_chunkIds": 1}, "options": {"background": true}}
193-
{"collection": "variant_polygenic_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
194-
{"collection": "variant_polygenic_score", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}
195-
196189
{"collection": "snp", "fields": {"id": 1}, "options": {"background": true}}
197190
{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}

cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.opencb.cellbase.lib;
1818

1919
import org.apache.commons.collections4.CollectionUtils;
20+
import org.junit.Assert;
2021
import org.opencb.biodata.models.variant.Variant;
2122
import org.opencb.cellbase.core.common.GitRepositoryState;
2223
import org.opencb.cellbase.core.config.CellBaseConfiguration;
@@ -66,7 +67,7 @@ public class GenericMongoDBAdaptorTest {
6667
private static final String LOCALHOST = "localhost:27017";
6768
protected static final String SPECIES = "hsapiens";
6869
protected static final String ASSEMBLY = "grch38";
69-
// protected static final String API_VERSION = "v5";
70+
// protected static final String API_VERSION = "v5";
7071
private static final String MONGODB_CELLBASE_LOADER = "org.opencb.cellbase.lib.loader.MongoDBCellBaseLoader";
7172
protected CellBaseConfiguration cellBaseConfiguration;
7273
protected CellBaseManagerFactory cellBaseManagerFactory;
@@ -90,18 +91,19 @@ public GenericMongoDBAdaptorTest() {
9091
GenericMongoDBAdaptorTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"),
9192
CellBaseConfiguration.ConfigurationFileFormat.YAML);
9293

93-
String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\.");
94-
cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]);
95-
cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration);
94+
String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\.");
95+
cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]);
96+
cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration);
9697

97-
cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());
98+
cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());
9899

99-
loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2,
100-
cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration);
100+
loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2,
101+
cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration);
101102

102103
initDB();
103104
} catch (Exception e) {
104105
e.printStackTrace();
106+
Assert.fail();
105107
}
106108
}
107109

cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,18 @@
3131
import org.opencb.cellbase.core.exception.CellBaseException;
3232
import org.opencb.cellbase.core.result.CellBaseDataResult;
3333
import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
34+
import org.opencb.cellbase.lib.loader.LoaderException;
3435
import org.opencb.cellbase.lib.managers.ClinicalManager;
3536
import org.opencb.commons.datastore.core.QueryOptions;
3637

38+
import java.io.IOException;
39+
import java.lang.reflect.InvocationTargetException;
40+
import java.net.URISyntaxException;
3741
import java.util.Arrays;
3842
import java.util.Collections;
3943
import java.util.LinkedHashMap;
4044
import java.util.List;
45+
import java.util.concurrent.ExecutionException;
4146

4247
import static org.junit.jupiter.api.Assertions.*;
4348

cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
1111
import org.opencb.cellbase.lib.db.MongoDBManager;
1212
import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
13+
import org.opencb.cellbase.lib.managers.DataReleaseManager;
1314
import org.opencb.cellbase.lib.managers.GeneManager;
1415
import org.opencb.commons.datastore.core.DataResult;
1516
import org.opencb.commons.datastore.mongodb.MongoDBCollection;
@@ -26,18 +27,40 @@
2627

2728
public class IndexManagerTest extends GenericMongoDBAdaptorTest {
2829

30+
private DataReleaseManager dataReleaseManager;
2931
private IndexManager indexManager;
3032

31-
public IndexManagerTest() throws URISyntaxException {
33+
public IndexManagerTest() throws URISyntaxException, CellBaseException {
3234
super();
3335

3436
Path path = Paths.get(getClass().getResource("/index/mongodb-indexes.json").toURI());
35-
indexManager = new IndexManager(cellBaseName, path, cellBaseConfiguration);
37+
dataReleaseManager = new DataReleaseManager(SPECIES, ASSEMBLY, cellBaseConfiguration);
38+
indexManager = new IndexManager(cellBaseName, path, dataReleaseManager, cellBaseConfiguration);
3639
}
3740

3841
@Test
39-
public void testIndexes() throws IOException, CellBaseException, QueryException, IllegalAccessException {
40-
String collectionName = "gene" + CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR + dataRelease.getRelease();
42+
public void testIndexesUsingDataName() throws IOException, CellBaseException, QueryException, IllegalAccessException {
43+
indexManager.createMongoDBIndexes("gene", dataRelease.getRelease(), true);
44+
45+
MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration);
46+
MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore(SPECIES, ASSEMBLY);
47+
MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(CellBaseDBAdaptor.buildCollectionName("gene", dataRelease.getRelease()));
48+
DataResult<Document> index = mongoDBCollection.getIndex();
49+
assertNotNull(index);
50+
51+
GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY);
52+
GeneQuery query = new GeneQuery();
53+
query.setNames(Collections.singletonList("BRCA1"));
54+
query.setDataRelease(dataRelease.getRelease());
55+
CellBaseDataResult<Gene> result = geneManager.search(query);
56+
assertEquals(1, result.getNumResults());
57+
assertEquals("BRCA1", result.getResults().get(0).getName());
58+
assertEquals("ENSG00000012048", result.getResults().get(0).getId());
59+
}
60+
61+
@Test
62+
public void testIndexesUsingCollectionName() throws IOException, CellBaseException, QueryException, IllegalAccessException {
63+
String collectionName = CellBaseDBAdaptor.buildCollectionName("gene", dataRelease.getRelease());
4164

4265
indexManager.createMongoDBIndexes(Collections.singletonList(collectionName), true);
4366

cellbase-lib/src/test/resources/configuration.test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
version: v5
2+
version: "${CELLBASE.VERSION}"
33
apiVersion: "${project.version}"
44
wiki: https://github.com/opencb/cellbase/wiki
55
maintenanceFlagFile: "/tmp/maintenance"

cellbase-lib/src/test/resources/index/mongodb-indexes.json

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,5 @@
186186
{"collection": "protein_substitution_prediction", "fields": {"transcriptId": 1}, "options": {"background": true}}
187187
{"collection": "protein_substitution_prediction", "fields": {"aaPosition": 1}, "options": {"background": true}}
188188

189-
{"collection": "common_polygenic_score", "fields": {"id": 1}, "options": {"background": true}}
190-
{"collection": "common_polygenic_score", "fields": {"name": 1}, "options": {"background": true}}
191-
{"collection": "common_polygenic_score", "fields": {"source": 1}, "options": {"background": true}}
192-
{"collection": "variant_polygenic_score", "fields": {"_chunkIds": 1}, "options": {"background": true}}
193-
{"collection": "variant_polygenic_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
194-
{"collection": "variant_polygenic_score", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}
195-
196189
{"collection": "snp", "fields": {"id": 1}, "options": {"background": true}}
197190
{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}

0 commit comments

Comments
 (0)