Skip to content

Commit aa524f9

Browse files
authored
Merge pull request #14 from ubbdst/chars
Generate sort label
2 parents c7711be + 7d7bb1a commit aa524f9

File tree

18 files changed

+491
-326
lines changed

18 files changed

+491
-326
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@ release
55
*.iml
66
/target/
77
/src/test/java/org/elasticsearch/plugin/
8-
/src/main/java/org/elasticsearch/river/eea_rdf/settings/EEASettings.class
8+
/org/elasticsearch/river/ubb/settings/EEASettings.class
99
pom.xml.versionsBackup
1010
pom.xml.releaseBackup
1111
.classpath
1212
.project
1313
.DS_Store
14+
.settings
15+
/bin
1416

1517

pom.xml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
33
<modelVersion>4.0.0</modelVersion>
4-
<groupId>org.elasticsearch.plugin.river.eea_rdf</groupId>
5-
<artifactId>eea-rdf-river-plugin</artifactId>
4+
<groupId>org.elasticsearch.plugin.river.ubb</groupId>
5+
<artifactId>ubb-rdf-river-plugin</artifactId>
66
<version>1.7.6</version>
77

88
<packaging>jar</packaging>
99

10-
<name>eea-rdf-river-plugin</name>
10+
<name>ubb-rdf-river-plugin</name>
1111
<description>Elasticsearch RDF River Plugin</description>
1212

1313
<inceptionYear>2013</inceptionYear>
@@ -128,8 +128,6 @@
128128
</plugin>
129129

130130
</plugins>
131-
132-
133131
</build>
134132

135133

src/main/java/org/elasticsearch/plugin/river/eea_rdf/RDFRiverPlugin.java renamed to src/main/java/org/elasticsearch/plugin/river/ubb/RDFRiverPlugin.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
package org.elasticsearch.plugin.river.eea_rdf.eea_rdf_river_plugin;
1+
package org.elasticsearch.plugin.river.ubb;
22

33
import org.elasticsearch.common.inject.Inject;
44
import org.elasticsearch.plugins.AbstractPlugin;
55
import org.elasticsearch.river.RiversModule;
6-
import org.elasticsearch.river.eea_rdf.RDFRiverModule;
7-
import org.elasticsearch.river.eea_rdf.settings.EEASettings;
6+
import org.elasticsearch.river.ubb.RDFRiverModule;
7+
import org.elasticsearch.river.ubb.settings.Defaults;
88

99
/**
1010
* @author iulia, EEA
@@ -17,7 +17,7 @@ public RDFRiverPlugin() {
1717

1818
@Override
1919
public String name() {
20-
return EEASettings.RIVER_PLUGIN_NAME;
20+
return Defaults.RIVER_PLUGIN_NAME;
2121
}
2222

2323
@Override
@@ -26,7 +26,7 @@ public String description() {
2626
}
2727

2828
public void onModule(RiversModule module) {
29-
module.registerRiver(EEASettings.RIVER_SETTINGS_KEY, RDFRiverModule.class);
29+
module.registerRiver(Defaults.EEA_SETTINGS_KEY, RDFRiverModule.class);
3030
}
3131
}
3232

src/main/java/org/elasticsearch/river/eea_rdf/RDFRiver.java renamed to src/main/java/org/elasticsearch/river/ubb/RDFRiver.java

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package org.elasticsearch.river.eea_rdf;
1+
package org.elasticsearch.river.ubb;
22

33
import org.elasticsearch.client.Client;
44
import org.elasticsearch.common.inject.Inject;
@@ -7,17 +7,18 @@
77
import org.elasticsearch.common.util.concurrent.EsExecutors;
88
import org.elasticsearch.common.xcontent.support.XContentMapValues;
99
import org.elasticsearch.river.*;
10-
import org.elasticsearch.river.eea_rdf.settings.EEASettings;
11-
import org.elasticsearch.river.eea_rdf.support.ContextFactory;
12-
import org.elasticsearch.river.eea_rdf.support.Harvester;
13-
import org.elasticsearch.river.eea_rdf.support.JsonFileLoader;
10+
import org.elasticsearch.river.ubb.settings.Defaults;
11+
import org.elasticsearch.river.ubb.support.ContextFactory;
12+
import org.elasticsearch.river.ubb.support.Harvester;
13+
import org.elasticsearch.river.ubb.support.JsonFileLoader;
1414

1515
import java.util.Arrays;
1616
import java.util.List;
1717
import java.util.Map;
1818

1919
/**
20-
* @author EEA
20+
* @author European Environment Agency (EEA)
21+
* @author Hemed Al Ruwehy
2122
* <p>
2223
* Modified by Hemed Ali Al Ruwehy, The University of Bergen Library
2324
* @since 09-03-2015
@@ -35,7 +36,7 @@ public RDFRiver(RiverName riverName,
3536
super(riverName, settings);
3637
harvester = new Harvester();
3738
harvester.client(client).riverName(riverName.name());
38-
addHarvesterSettings(settings);
39+
buildHarvester(settings);
3940
}
4041

4142
/**
@@ -46,6 +47,19 @@ private static Map<String, Object> extractSettings(RiverSettings settings, Strin
4647
return (Map<String, Object>) settings.settings().get(key);
4748
}
4849

50+
/**
51+
* Type casting accessors for river settings
52+
**/
53+
@SuppressWarnings("unchecked")
54+
private static Map<String, Object> extractSettings(RiverSettings settings) {
55+
if (settings.settings().containsKey(Defaults.EEA_SETTINGS_KEY)) {
56+
return (Map<String, Object>) settings.settings().get(Defaults.EEA_SETTINGS_KEY);
57+
}
58+
throw new IllegalArgumentException(String.
59+
format("No key in the river settings. Expected \"%s\"", Defaults.EEA_SETTINGS_KEY));
60+
}
61+
62+
4963
@SuppressWarnings("unchecked")
5064
private static Map<String, String> getStrStrMapFromSettings(Map<String, Object> settings, String key) {
5165
return (Map<String, String>) settings.get(key);
@@ -63,7 +77,7 @@ private static Map<String, String> loadProperties(Map<String, Object> settings,
6377

6478
private static String loadContext(Map<String, Object> settings, String key) {
6579
Object values = settings.get(key);
66-
logger.info("Reading context from: " + values);
80+
logger.info("Reading from context: " + values);
6781
return new JsonFileLoader().resolveToString(values.toString());
6882
}
6983

@@ -78,13 +92,11 @@ private static List<String> getStrListFromSettings(Map<String, Object> settings,
7892
return (List<String>) settings.get(key);
7993
}
8094

81-
private void addHarvesterSettings(RiverSettings settings) {
82-
if (!settings.settings().containsKey(EEASettings.RIVER_SETTINGS_KEY)) {
83-
throw new IllegalArgumentException(
84-
String.format("There is no \"%s\" key in the river settings.", EEASettings.RIVER_SETTINGS_KEY));
85-
}
86-
87-
Map<String, Object> rdfSettings = extractSettings(settings, EEASettings.RIVER_SETTINGS_KEY);
95+
/**
96+
* Builds harvester with the provided settings
97+
*/
98+
private void buildHarvester(RiverSettings settings) {
99+
Map<String, Object> rdfSettings = extractSettings(settings);
88100
harvester.rdfIndexType(XContentMapValues.nodeStringValue(
89101
rdfSettings.get("indexType"), "full"))
90102
.rdfStartTime(XContentMapValues.nodeStringValue(
@@ -99,55 +111,57 @@ private void addHarvesterSettings(RiverSettings settings) {
99111
rdfSettings.get("queryPath"), ""))
100112
.rdfNumberOfBulkActions(XContentMapValues.nodeLongValue(
101113
rdfSettings.get("bulkActions"),
102-
EEASettings.DEFAULT_NUMBER_OF_BULK_ACTIONS))
114+
Defaults.DEFAULT_NUMBER_OF_BULK_ACTIONS))
103115
.rdfUpdateDocuments(XContentMapValues.nodeBooleanValue(
104116
rdfSettings.get("updateDocuments"),
105-
EEASettings.DEFAULT_UPDATE_DOCUMENTS))
117+
Defaults.DEFAULT_UPDATE_DOCUMENTS))
106118
.rdfQueryType(XContentMapValues.nodeStringValue(
107119
rdfSettings.get("queryType"),
108-
EEASettings.DEFAULT_QUERYTYPE))
120+
Defaults.DEFAULT_QUERYTYPE))
109121
.rdfListType(XContentMapValues.nodeStringValue(
110122
rdfSettings.get("listtype"),
111-
EEASettings.DEFAULT_LIST_TYPE))
123+
Defaults.DEFAULT_LIST_TYPE))
112124
.rdfAddLanguage(XContentMapValues.nodeBooleanValue(
113125
rdfSettings.get("addLanguage"),
114-
EEASettings.DEFAULT_ADD_LANGUAGE))
126+
Defaults.DEFAULT_ADD_LANGUAGE))
115127
.rdfLanguage(XContentMapValues.nodeStringValue(
116128
rdfSettings.get("language"),
117-
EEASettings.DEFAULT_LANGUAGE))
129+
Defaults.DEFAULT_LANGUAGE))
118130
.rdfAddUriForResource(XContentMapValues.nodeBooleanValue(
119131
rdfSettings.get("includeResourceURI"),
120-
EEASettings.DEFAULT_ADD_URI))
132+
Defaults.DEFAULT_ADD_URI))
121133
.removeIllegalCharsForSuggestion(XContentMapValues.nodeBooleanValue(
122134
rdfSettings.get("removeIllegalCharsForSuggestion"),
123135
true))
124136
.deleteRiverAfterCreation(XContentMapValues.nodeBooleanValue(
125137
rdfSettings.get("deleteRiverAfterCreation"), false))
138+
.generateSortLabel(XContentMapValues.nodeBooleanValue(
139+
rdfSettings.get("generateSortLabel"), false))
126140
.maxSuggestInputLength(XContentMapValues.nodeIntegerValue(
127141
rdfSettings.get("maxSuggestInputLength"),
128-
EEASettings.DEFAULT_MAX_SUGGEST_INPUT_LENGTH))
142+
Defaults.DEFAULT_MAX_SUGGEST_INPUT_LENGTH))
129143
/*.rdfURIDescription(XContentMapValues.nodeStringValue(
130144
rdfSettings.get("uriDescription"),
131145
EEASettings.DEFAULT_URI_DESCRIPTION))
132146
*/
133147
.rdfSyncConditions(XContentMapValues.nodeStringValue(
134148
rdfSettings.get("syncConditions"),
135-
EEASettings.DEFAULT_SYNC_COND))
149+
Defaults.DEFAULT_SYNC_COND))
136150
/*.rdfContextProp(XContentMapValues.nodeStringValue(
137151
rdfSettings.get("context"), ""))
138152
*/
139153
.rdfSyncTimeProp(XContentMapValues.nodeStringValue(
140154
rdfSettings.get("syncTimeProp"),
141-
EEASettings.DEFAULT_SYNC_TIME_PROP))
155+
Defaults.DEFAULT_SYNC_TIME_PROP))
142156
.rdfSyncOldData(XContentMapValues.nodeBooleanValue(
143157
rdfSettings.get("syncOldData"),
144-
EEASettings.DEFAULT_SYNC_OLD_DATA));
158+
Defaults.DEFAULT_SYNC_OLD_DATA));
145159

146160
if (rdfSettings.containsKey("uriDescription")) {
147161
harvester.rdfURIDescription(getStrListFromSettings(rdfSettings, "uriDescription"));
148162
} else {
149163
//Convert the default array to List
150-
List<String> defaultUriList = Arrays.asList(EEASettings.DEFAULT_URI_DESCRIPTION);
164+
List<String> defaultUriList = Arrays.asList(Defaults.DEFAULT_URI_DESCRIPTION);
151165
harvester.rdfURIDescription(defaultUriList);
152166
}
153167
if (rdfSettings.containsKey("proplist")) {
@@ -156,7 +170,7 @@ private void addHarvesterSettings(RiverSettings settings) {
156170
if (rdfSettings.containsKey("query")) {
157171
harvester.rdfQuery(getStrListFromSettings(rdfSettings, "query"));
158172
} else {
159-
harvester.rdfQuery(EEASettings.DEFAULT_QUERIES);
173+
harvester.rdfQuery(Defaults.DEFAULT_QUERIES);
160174
}
161175
/*if (rdfSettings.containsKey("normProp")) {
162176
harvester.rdfNormalizationProp(getStrStrMapFromSettings(rdfSettings, "normProp"));
@@ -181,29 +195,28 @@ private void addHarvesterSettings(RiverSettings settings) {
181195
}
182196
if (settings.settings().containsKey("index")) {
183197
Map<String, Object> indexSettings = extractSettings(settings, "index");
184-
harvester.index(XContentMapValues.nodeStringValue(
185-
indexSettings.get("index"),
186-
EEASettings.DEFAULT_INDEX_NAME))
187-
.type(XContentMapValues.nodeStringValue(
188-
indexSettings.get("type"),
189-
EEASettings.DEFAULT_TYPE_NAME));
198+
harvester.index(XContentMapValues.nodeStringValue(indexSettings.get("index"),
199+
Defaults.DEFAULT_INDEX_NAME))
200+
.type(XContentMapValues.nodeStringValue(indexSettings.get("type"),
201+
Defaults.DEFAULT_TYPE_NAME));
190202
} else {
191-
harvester.index(EEASettings.DEFAULT_INDEX_NAME)
192-
.type(EEASettings.DEFAULT_TYPE_NAME);
203+
harvester.index(Defaults.DEFAULT_INDEX_NAME).type(Defaults.DEFAULT_TYPE_NAME);
193204
}
194205
}
195206

196207
@Override
197208
public void start() {
209+
harvester.log("Starting river [" + riverName.name() + "]");
210+
harvester.timeStarted(System.currentTimeMillis());
198211
harvesterThread = EsExecutors.daemonThreadFactory(
199-
settings.globalSettings(), "eea_rdf_river(" + riverName().name() + ")")
212+
settings.globalSettings(), "ubbRiver[" + riverName().name() + "]")
200213
.newThread(harvester);
201214
harvesterThread.start();
202215
}
203216

204217
@Override
205218
public void close() {
206-
harvester.log("Closing UBB RDF river [" + riverName.name() + "]");
219+
harvester.log("Closing river [" + riverName.name() + "]");
207220
harvester.setClose(true);
208221
if (harvesterThread != null && !harvesterThread.isInterrupted()) {
209222
harvesterThread.interrupt();

src/main/java/org/elasticsearch/river/eea_rdf/RDFRiverModule.java renamed to src/main/java/org/elasticsearch/river/ubb/RDFRiverModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package org.elasticsearch.river.eea_rdf;
1+
package org.elasticsearch.river.ubb;
22

33
import org.elasticsearch.common.inject.AbstractModule;
44
import org.elasticsearch.river.River;
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,23 @@
1-
package org.elasticsearch.river.eea_rdf.settings;
1+
package org.elasticsearch.river.ubb.settings;
22

33
import java.util.ArrayList;
44
import java.util.List;
5-
import java.util.concurrent.TimeUnit;
6-
import java.util.regex.Pattern;
75

86
/**
9-
* @author EEA
7+
* @author European Environment Agency (EEA)
8+
* @author Hemed Al Ruwehy
109
* Modified by Hemed Ali, 09-03-2015
1110
*/
12-
public abstract class EEASettings {
11+
public final class Defaults {
1312

14-
public final static String RIVER_SETTINGS_KEY = "eeaRDF";
15-
public final static String UBB_SETTINGS_KEY = "ubbRDF";
13+
public final static String EEA_SETTINGS_KEY = "eeaRDF";
1614
public final static String DEFAULT_INDEX_NAME = "rdfdata";
1715
public final static String DEFAULT_TYPE_NAME = "resource";
1816
public final static String SUGGESTION_FIELD = "suggest";
1917
public final static String RIVER_PLUGIN_NAME = "ubb-rdf-river";
2018
public final static String SUGGESTION_INPUT_FIELD = "input";
2119
public final static String SUGGESTION_OUTPUT_FIELD = "output";
2220
public final static String SUGGESTION_PAYLOAD_FIELD = "payload";
23-
2421
public final static int DEFAULT_NUMBER_OF_BULK_ACTIONS = 100;
2522
public final static int DEFAULT_MAX_SUGGEST_INPUT_LENGTH = 50;
2623
public final static int DEFAULT_NUMBER_OF_RETRY = 5;
@@ -46,46 +43,22 @@ public abstract class EEASettings {
4643
public final static String DEFAULT_LANGUAGE = "no";
4744
public final static Boolean DEFAULT_ADD_URI = true;
4845
public final static Boolean DEFAULT_UPDATE_DOCUMENTS = false;
49-
public final static String[] DEFAULT_URI_DESCRIPTION = {"http://www.w3.org/2000/01/rdf-schema#label"};
46+
public final static String[] DEFAULT_URI_DESCRIPTION = {
47+
"http://www.w3.org/2000/01/rdf-schema#label"
48+
};
5049
public final static String DEFAULT_SYNC_COND = "";
51-
public final static String DEFAULT_SYNC_TIME_PROP =
52-
"http://cr.eionet.europa.eu/ontologies/contreg.rdf#lastRefreshed";
50+
public final static String DEFAULT_SYNC_TIME_PROP = "http://cr.eionet.europa.eu/ontologies/contreg.rdf#lastRefreshed";
5351
public final static Boolean DEFAULT_SYNC_OLD_DATA = false;
52+
public final static long DEFAULT_QUERY_LIMIT = 1000;
53+
public static final String SORT_LABEL_NAME = "labelSort";
5454

55-
public static String parseForJson(String text) {
56-
return text.trim().replaceAll("[\n\r]", " ")
57-
.replace('\"', '\'')
58-
.replace("\t", " ")
59-
.replace("\\'", "\'")
60-
.replaceAll("\\\\x[a-fA-F0-9][a-fA-F0-9]", "_")
61-
.replace("\\", "\\\\");
62-
}
63-
64-
public static String removeIllegalXMLChar(String text) {
65-
Pattern invalidXMLChars = Pattern.compile("[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1F]");
66-
invalidXMLChars.matcher(text).replaceAll("");
67-
return text;
68-
}
69-
70-
/**
71-
* Gets time representation as string
72-
*/
73-
public static String getTimeFormatAsString(long timeInMilliSeconds) {
74-
75-
//Time in seconds
76-
double timeInSeconds = timeInMilliSeconds/1000.0;
77-
78-
//In minutes
79-
if(timeInSeconds >= 60 && timeInSeconds < 60*60){
80-
return timeInSeconds/60 + " minutes";
81-
}
55+
//In order of priority
56+
public static final String[] SORT_LABELS = {
57+
"http://purl.org/dc/terms/title",
58+
"http://xmlns.com/foaf/0.1/name",
59+
"http://www.w3.org/2004/02/skos/core#prefLabel",
60+
"http://www.w3.org/2000/01/rdf-schema#label"
61+
};
8262

83-
//In hours
84-
if(timeInSeconds >= 60*60 && timeInSeconds < 24*3600) {
85-
return timeInSeconds/3600 + " hours";
86-
}
8763

88-
//default unit
89-
return timeInSeconds + " seconds";
90-
}
9164
}

0 commit comments

Comments
 (0)