1919import com .fasterxml .jackson .databind .DeserializationFeature ;
2020import com .fasterxml .jackson .databind .MapperFeature ;
2121import com .fasterxml .jackson .databind .ObjectMapper ;
22+ import org .apache .commons .collections4 .CollectionUtils ;
2223import org .apache .commons .lang3 .StringUtils ;
2324import org .opencb .cellbase .core .config .CellBaseConfiguration ;
25+ import org .opencb .cellbase .core .exception .CellBaseException ;
26+ import org .opencb .cellbase .core .models .Release ;
27+ import org .opencb .cellbase .lib .EtlCommons ;
2428import org .opencb .cellbase .lib .db .MongoDBManager ;
29+ import org .opencb .cellbase .lib .impl .core .CellBaseDBAdaptor ;
30+ import org .opencb .cellbase .lib .managers .DataReleaseManager ;
2531import org .opencb .commons .datastore .core .ObjectMap ;
2632import org .opencb .commons .datastore .mongodb .MongoDBIndexUtils ;
2733import org .opencb .commons .datastore .mongodb .MongoDataStore ;
3541import java .util .*;
3642
3743import static org .opencb .cellbase .lib .impl .core .CellBaseDBAdaptor .DATA_RELEASE_SEPARATOR ;
44+ import static org .opencb .cellbase .lib .impl .core .CellBaseDBAdaptor .buildCollectionName ;
3845
3946
4047public class IndexManager {
4148
49+ private DataReleaseManager dataReleaseManager ;
4250 private CellBaseConfiguration configuration ;
4351 private Logger logger ;
4452 private String databaseName ;
@@ -48,31 +56,10 @@ public class IndexManager {
4856
4957 private Map <String , List <Map <String , ObjectMap >>> indexes ;
5058
51- private static final Map <String , List <String >> DATA_COLLECTIONS = new HashMap <>();
52-
53- static {
54- DATA_COLLECTIONS .put ("genome" , Arrays .asList ("genome_info" , "genome_sequence" ));
55- DATA_COLLECTIONS .put ("conservation" , Collections .singletonList ("conservation" ));
56- DATA_COLLECTIONS .put ("repeats" , Collections .singletonList ("repeats" ));
57- DATA_COLLECTIONS .put ("gene" , Arrays .asList ("gene" , "refseq" ));
58- DATA_COLLECTIONS .put ("protein" , Collections .singletonList ("protein" ));
59- DATA_COLLECTIONS .put ("regulation" , Arrays .asList ("regulatory_region" , "regulatory_pfm" ));
60- DATA_COLLECTIONS .put ("variation" , Collections .singletonList ("variation" ));
61- DATA_COLLECTIONS .put ("variation_functional_score" , Collections .singletonList ("missense_variation_functional_score" ));
62- DATA_COLLECTIONS .put ("protein_functional_prediction" , Collections .singletonList ("protein_functional_prediction" ));
63- DATA_COLLECTIONS .put ("revel" , Collections .singletonList ("revel" ));
64- DATA_COLLECTIONS .put ("alphamissense" , Collections .singletonList ("alphamissense" ));
65- DATA_COLLECTIONS .put ("clinical_variants" , Collections .singletonList ("clinical_variants" ));
66- DATA_COLLECTIONS .put ("splice_score" , Collections .singletonList ("splice_score" ));
67- DATA_COLLECTIONS .put ("ontology" , Collections .singletonList ("ontology" ));
68- DATA_COLLECTIONS .put ("pubmed" , Collections .singletonList ("pubmed" ));
69- DATA_COLLECTIONS .put ("pharmacogenomics" , Collections .singletonList ("pharmacogenomics" ));
70- DATA_COLLECTIONS .put ("polygenic_score" , Arrays .asList ("variant_polygenic_score" , "common_polygenic_score" ));
71- }
72-
73- public IndexManager (String databaseName , Path indexFile , CellBaseConfiguration configuration ) {
59+ public IndexManager (String databaseName , Path indexFile , DataReleaseManager dataReleaseManager , CellBaseConfiguration configuration ) {
7460 this .databaseName = databaseName ;
7561 this .indexFile = indexFile ;
62+ this .dataReleaseManager = dataReleaseManager ;
7663 this .configuration = configuration ;
7764
7865 init ();
@@ -82,8 +69,6 @@ private void init() {
8269 logger = LoggerFactory .getLogger (this .getClass ());
8370 mongoDBManager = new MongoDBManager (configuration );
8471
85- // Path indexFile = Paths.get("./cellbase-lib/src/main/resources/mongodb-indexes.json");
86-
8772 MongoDataStore mongoDBDatastore = mongoDBManager .createMongoDBDatastore (databaseName );
8873 mongoDBIndexUtils = new MongoDBIndexUtils (mongoDBDatastore , indexFile );
8974
@@ -99,24 +84,46 @@ private void init() {
9984 * @param dropIndexesFirst if TRUE, deletes the index before creating a new one. FALSE, no index is created if it
10085 * already exists.
10186 * @throws IOException if configuration file can't be read
87+ * @throws CellBaseException if DataRelease manager raises an exception
10288 */
103- @ Deprecated
104- public void createMongoDBIndexes (String data , String dataRelease , boolean dropIndexesFirst ) throws IOException {
105- // InputStream indexResourceStream = getClass().getResourceAsStream("mongodb-indexes.json");
89+ public void createMongoDBIndexes (String data , int dataRelease , boolean dropIndexesFirst ) throws IOException , CellBaseException {
90+ Release release = dataReleaseManager .get (dataRelease );
91+
92+ List <String > collections = new ArrayList <>();
10693 if (StringUtils .isEmpty (data ) || "all" .equalsIgnoreCase (data )) {
107- mongoDBIndexUtils .createAllIndexes (dropIndexesFirst );
108- // mongoDBIndexUtils.createAllIndexes(mongoDataStore, indexResourceStream, dropIndexesFirst);
109- logger .info ("Loaded all indexes" );
94+ logger .info ("Indexing all data ({}) for data release {}" , StringUtils .join (release .getCollections ().keySet (), ", " ),
95+ dataRelease );
96+ for (Map .Entry <String , String > entry : release .getCollections ().entrySet ()) {
97+ // Sanity check
98+ if (!entry .getValue ().endsWith (DATA_RELEASE_SEPARATOR + dataRelease )) {
99+ throw new CellBaseException ("Something wrong when indexing: Collection " + entry .getValue () + " found when indexing"
100+ + " data release " + dataRelease );
101+ }
102+ collections .add (entry .getValue ());
103+ }
110104 } else {
111105 List <String > dataList = Arrays .asList (data .split ("," ));
112106 for (String dataName : dataList ) {
113- List <String > collections = new ArrayList <>();
114- for (String collection : DATA_COLLECTIONS .get (dataName )) {
115- collections .add (collection + DATA_RELEASE_SEPARATOR + dataRelease );
107+ if (release .getCollections ().containsKey (dataName )) {
108+ collections .add (release .getCollections ().get (dataName ));
109+ } else {
110+ throw new CellBaseException ("Error indexing: data '" + dataName + "' missing in data release " + dataRelease
111+ + " (" + StringUtils .join (release .getCollections ().keySet (), ", " ) + ")" );
116112 }
117- createMongoDBIndexes (collections , dropIndexesFirst );
118113 }
119114 }
115+
116+ // Remove temporary polygenic score collections
117+ collections .remove (CellBaseDBAdaptor .buildCollectionName (EtlCommons .PGS_COMMON_COLLECTION , dataRelease ));
118+ collections .remove (CellBaseDBAdaptor .buildCollectionName (EtlCommons .PGS_VARIANT_COLLECTION , dataRelease ));
119+
120+ // Check collection names
121+ if (CollectionUtils .isEmpty (collections )) {
122+ throw new CellBaseException ("No collections to index" );
123+ }
124+
125+ // Create MongoDB indexes
126+ createMongoDBIndexes (collections , dropIndexesFirst );
120127 }
121128
122129 public void createMongoDBIndexes (List <String > collections , boolean dropIndexesFirst ) throws IOException {
@@ -154,13 +161,21 @@ public void validateMongoDBIndexes(String collectionName) throws IOException {
154161 }
155162 }
156163 }
157-
164+ private void createAllIndexes (int dataRelease , boolean dropIndexesFirst ) throws IOException {
165+ Map <String , List <Map <String , ObjectMap >>> indexes = getIndexesFromFile ();
166+
167+ for (String key : indexes .keySet ()) {
168+ String collectionName = buildCollectionName (key , dataRelease );
169+ logger .info ("Creating index for collection {}" , collectionName );
170+ mongoDBIndexUtils .createIndexes (collectionName , indexes .get (key ), dropIndexesFirst );
171+ logger .info ("Done." );
172+ }
173+ }
158174 private void checkIndexes () throws IOException {
159175 if (indexes == null ) {
160176 indexes = getIndexesFromFile ();
161177 }
162178 }
163-
164179 private Map <String , List <Map <String , ObjectMap >>> getIndexesFromFile () throws IOException {
165180 ObjectMapper objectMapper = generateDefaultObjectMapper ();
166181 Map <String , List <Map <String , ObjectMap >>> indexes = new HashMap <>();
0 commit comments