Skip to content

Commit 7830de1

Browse files
author
Peter Kraker
committed
Cleanup
Added functionality to create new revisions based on the timestamp of the last revision
1 parent 9b5ad83 commit 7830de1

4 files changed

Lines changed: 84 additions & 148 deletions

File tree

server/classes/headstart/persistence/SQLitePersistence.php

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,21 @@ public function createVisualization($vis_id, $vis_title, $data) {
5353

5454
}
5555

56-
public function getLastVersion($vis_id) {
57-
return $this->getRevision($vis_id, null);
56+
public function getLastVersion($vis_id, $details=false) {
57+
return $this->getRevision($vis_id, null, $details);
5858
}
5959

60-
public function getRevision($vis_id, $rev_id) {
60+
public function getRevision($vis_id, $rev_id, $details=false) {
6161

6262
$id = ($rev_id == null)?("revisions.rev_id"):("?");
6363
$array = ($rev_id == null)?(array(addslashes($vis_id))):(array(addslashes($vis_id), $rev_id));
64+
$return_fields = ($details==true)?("revisions.*"):("revisions.rev_data");
6465

65-
$result = $this->prepareExecuteAndReturnFirstResult("SELECT revisions.rev_data FROM revisions, visualizations
66+
$result = $this->prepareExecuteAndReturnResult("SELECT $return_fields FROM revisions, visualizations
6667
WHERE visualizations.vis_id = ?
6768
AND visualizations.vis_id = revisions.rev_vis
6869
AND visualizations.vis_latest =" . $id
69-
, $array);
70+
, $array, !$details);
7071

7172
return $result;
7273
}
@@ -76,7 +77,7 @@ public function writeRevision($vis_id, $data, $rev_id=null) {
7677
$rev = $rev_id;
7778

7879
if($rev == null) {
79-
$ver = $this->prepareExecuteAndReturnFirstResult("SELECT vis_latest FROM visualizations WHERE vis_id=?", array($vis_id));
80+
$ver = $this->prepareExecuteAndReturnResult("SELECT vis_latest FROM visualizations WHERE vis_id=?", array($vis_id), true);
8081
$rev = $ver + 1;
8182
}
8283

@@ -102,11 +103,17 @@ private function prepareAndExecute($stmt, $array) {
102103
return array("status" => $result, "query" => $query);
103104
}
104105

105-
private function prepareExecuteAndReturnFirstResult($stmt, $array) {
106+
private function prepareExecuteAndReturnResult($stmt, $array, $first=false) {
106107
$result = $this->prepareAndExecute($stmt, $array);
107108
$fetch_result = $result["query"]->fetch();
108109

109-
return $fetch_result[0];
110+
if($fetch_result == false) {
111+
return false;
112+
} else if($first == true) {
113+
return $fetch_result[0];
114+
} else {
115+
return $fetch_result;
116+
}
110117

111118
}
112119

server/preprocessing/other-scripts/rplos.R

Lines changed: 34 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ library(proxy)
1313
library(SnowballC)
1414
library(rplos)
1515
library(jsonlite)
16-
#library(lsa)
17-
#library(stylo)
16+
17+
debug = FALSE
1818

1919
# Get data from PLOS API
2020
start.time <- Sys.time()
@@ -64,41 +64,30 @@ corpus <- tm_map(corpus, stemDocument)
6464

6565
tdm <- TermDocumentMatrix(corpus)
6666

67-
#tdm <- weightTfIdf(tdm, normalize = TRUE)
68-
6967
tdm <- removeSparseTerms(tdm, 0.3)
7068

7169
tdm_matrix = t(as.matrix(tdm))
72-
#diag(tdm_matrix) <- NA
70+
7371
distance_matrix_2 <- as.matrix(proxy::dist(tdm_matrix, method = "cosine"))
7472
distance_matrix = as.dist(distance_matrix_2)
7573

76-
#td.mat <- as.matrix(TermDocumentMatrix(corpus))
77-
#td.mat.lsa <- lw_bintf(td.mat) * gw_idf(td.mat)
78-
#lsaSpace <- lsa(td.mat.lsa)
79-
#tdm_matrix = t(as.textmatrix(lsaSpace))
80-
#distance_matrix_2 <- as.matrix(proxy::dist(tdm_matrix, method = "cosine"))
81-
#distance_matrix = as.dist(distance_matrix_2)
82-
83-
#distance_matrix <- apply(distance_matrix, 2, mean, na.rm=TRUE)
84-
#write.csv(as.matrix(distance_matrix), "matrix.csv")
85-
86-
8774
# Perform clustering, use elbow to determine a good number of clusters
8875
css_cluster <- css.hclust(distance_matrix, hclust.FUN.MoreArgs=list(method="ward.D"))
8976
cut_off = elbow.batch(css_cluster)
90-
#cut_off = elbow.batch(css_cluster,inc.thres=c(0.01,0.05,0.1),
91-
# ev.thres=c(0.95,0.9,0.8,0.75,0.67,0.5,0.33,0.2,0.1),precision=3)
77+
9278
num_clusters = cut_off$k
9379
meta_cluster = attr(css_cluster,"meta")
9480
cluster = meta_cluster$hclust.obj
9581
labels = labels(distance_matrix)
82+
groups <- cutree(cluster, k=num_clusters)
9683

97-
# Plot result of clustering to PDF file
98-
# pdf("clustering.pdf", width=19, height=12)
99-
# plot(cluster, labels=metadata$title, cex=0.6)
100-
# rect.hclust(cluster, k=num_clusters, border="red")
101-
# dev.off()
84+
if(debug == TRUE) {
85+
# Plot result of clustering to PDF file
86+
pdf("clustering.pdf", width=19, height=12)
87+
plot(cluster, labels=metadata$title, cex=0.6)
88+
rect.hclust(cluster, k=num_clusters, border="red")
89+
dev.off()
90+
}
10291

10392
num_clusters
10493

@@ -108,70 +97,35 @@ nm.nmin = nmds.min(nm)
10897
x = nm.nmin$X1
10998
y = nm.nmin$X2
11099

111-
# Plot results from multidimensional scaling, highlight clusters with symbols
112-
# pdf("mds.pdf")
113-
groups <- cutree(cluster, k=num_clusters)
114-
#plot(nm.nmin, pch=groups)
115-
# dev.off()
100+
if(debug == TRUE) {
101+
# Plot results from multidimensional scaling, highlight clusters with symbols
102+
pdf("mds.pdf")
103+
plot(nm.nmin, pch=groups)
104+
dev.off()
105+
}
116106

117107
# Prepare the output
118108
result = cbind(x,y,groups,labels)
119109
output = merge(metadata, result, by.x="id", by.y="labels", all=TRUE)
120110
names(output)[names(output)=="groups"] <- "area_uri"
121111
output["area"] = paste("Cluster ", output$area_uri, sep="")
122112

123-
#BigramTokenizer <-
124-
# function(x)
125-
# unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE)
126-
127-
#dtm <- DocumentTermMatrix(corpus_unstemmed, control = list(tokenize = BigramTokenizer))
128-
129-
# m_naming <- list(content = "titleabstract", id = "id")
130-
#
131-
# myReader_naming <- readTabular(mapping = m_naming)
132-
#
133-
# (corpus_naming <- Corpus(DataframeSource(cooc), readerControl = list(reader = myReader_naming)))
134-
#
135-
# corpus_naming <- tm_map(corpus_naming, removePunctuation)
136-
#
137-
# corpus_naming <- tm_map(corpus_naming, stripWhitespace)
138-
#
139-
# corpus_naming <- tm_map(corpus_naming, content_transformer(tolower))
140-
#
141-
# corpus_naming <- tm_map(corpus_naming, removeWords, stopwords("english"))
142-
143-
#corpus_naming <- tm_map(corpus_naming, stemDocument)
144-
145-
#subjects = cooc$subject
146-
#subjects = strsplit(subjects, "; ")
147-
#output$subjects_cleaned = sub(".*[/]", "", subjects)
148-
149-
#dtm = DocumentTermMatrix(corpus_unstemmed)
150-
#dtm = weightTfIdf(dtm)
151-
152-
#for (i in 1:num_clusters) {
153-
#inGroup <- which(output$area_uri==i)
154-
#within <- table(inGroup$subjects_cleaned)
155-
#most_freq_term = sort(colSums(as.matrix(within)), decreasing=TRUE)[1:4]
156-
#output$area[output$area_uri==i] = paste(names(most_freq_term), collapse=", ")
157-
#}
158-
159-
#output$area
160-
161113
output_json = toJSON(output)
162114
print(output_json)
163115

164-
# Write output to file
165-
# file_handle = file("output_file.csv", open="w")
166-
# write.csv(output, file=file_handle, row.names=FALSE)
167-
# close(file_handle)
168-
#
169-
# # Write some stats to a file
170-
# file_handle = file("stats.txt", open="w")
171-
# writeLines(c(paste("Number of Clusters:", num_clusters, sep=" ")
172-
# , paste("Description:", attributes(cut_off)$description)
173-
# , paste("Stress:", min(nm$stress), sep=" ")
174-
# , paste("R2:", max(nm$r2), sep=" ")
175-
# ), file_handle)
176-
#
177-
# close(file_handle)
116+
if(debug == TRUE) {
117+
# Write output to file
118+
file_handle = file("output_file.csv", open="w")
119+
write.csv(output, file=file_handle, row.names=FALSE)
120+
close(file_handle)
121+
122+
# # Write some stats to a file
123+
file_handle = file("stats.txt", open="w")
124+
writeLines(c(paste("Number of Clusters:", num_clusters, sep=" ")
125+
, paste("Description:", attributes(cut_off)$description)
126+
, paste("Stress:", min(nm$stress), sep=" ")
127+
, paste("R2:", max(nm$r2), sep=" ")
128+
), file_handle)
129+
130+
close(file_handle)
131+
}

server/services/helper.php

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
<?php
22

33
function getUserID() {
4-
if(isset($_SESSION['userInfo'])) {
4+
if (isset($_SESSION['userInfo'])) {
55
return $_SESSION['userInfo']['userID'];
66
} else {
77
return false;
88
}
99
}
10-
10+
11+
function redirect($url) {
12+
if (headers_sent()) {
13+
die('<script type="text/javascript">window.location=\'' . $url . '\';</script>');
14+
} else {
15+
header('Location: ' . $url);
16+
die();
17+
}
18+
}
19+
1120
?>

server/services/searchPLOS.php

Lines changed: 24 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,57 +5,15 @@
55
<?php
66
require dirname(__FILE__) . '/../classes/headstart/preprocessing/calculation/RCalculation.php';
77
require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php';
8+
require dirname(__FILE__) . '/../classes/headstart/preprocessing/naming/KeywordNaming.php';
89
require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php';
910
require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php';
1011

1112
require 'helper.php';
1213

1314
use headstart\library;
1415

15-
16-
function attachMostUsedKeywords(&$array, $num_keywords) {
17-
18-
$working_array = array();
19-
20-
foreach($array as $entry) {
21-
$uri = $entry["area_uri"];
22-
$keywords = split("; ", $entry["subject"]);
23-
foreach($keywords as &$keyword) {
24-
$keyword = substr($keyword, strrpos($keyword, "/") + 1);
25-
}
26-
27-
if(isset($working_array[$uri])) {
28-
$working_array[$uri] = array_merge($working_array[$uri], $keywords);
29-
} else {
30-
$working_array[$uri] = $keywords;
31-
}
32-
}
33-
34-
$result_array = array();
35-
foreach($working_array as $key => $current_array) {
36-
$counted_sorted_array = array_count_values($current_array);
37-
arsort($counted_sorted_array);
38-
$important_terms = array_keys(array_slice($counted_sorted_array, 0, $num_keywords));
39-
$final_string = implode(", ", $important_terms);
40-
$result_array[$key] = $final_string;
41-
}
42-
43-
foreach($array as $key => $entry) {
44-
$array[$key]["area"] = $result_array[$entry["area_uri"]];
45-
}
46-
47-
}
48-
49-
function redirect($url){
50-
if (headers_sent()){
51-
die('<script type="text/javascript">window.location=\''.$url.'\';</script>');
52-
}else{
53-
header('Location: ' . $url);
54-
die();
55-
}
56-
}
57-
58-
$INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/";
16+
$INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/";
5917

6018
$ini_array = library\Toolkit::loadIni($INI_DIR);
6119

@@ -69,9 +27,17 @@ function redirect($url){
6927

7028
$settings = $ini_array["general"];
7129

72-
if ($persistence->getLastVersion($query) != null) {
73-
redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
74-
return;
30+
$last_version = $persistence->getLastVersion($query, true);
31+
32+
if ($last_version != false) {
33+
$now = new DateTime();
34+
$last_version_timestamp = new DateTime($last_version["rev_timestamp"]);
35+
$diff = $last_version_timestamp->diff($now);
36+
37+
if ($diff->d == 0) {
38+
redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
39+
return;
40+
}
7541
}
7642

7743
$WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"];
@@ -85,26 +51,26 @@ function redirect($url){
8551

8652
$output_json = mb_convert_encoding($output_json, "UTF-8");
8753

88-
if(!library\Toolkit::isJSON($output_json)) {
89-
echo "Sorry! Something went wrong - most likely we haven't found any documents matching your search term. Please <a href=\"http://" . $settings["host"] . $settings["vis_path"] ."\">go back and try again.</a>";
54+
if (!library\Toolkit::isJSON($output_json)) {
55+
echo "Sorry! Something went wrong - most likely we haven't found any documents matching your search term. Please <a href=\"http://" . $settings["host"] . $settings["vis_path"] . "\">go back and try again.</a>";
9056
// echo $output_json;
9157
return;
9258
}
93-
59+
9460
$result = json_decode($output_json, true);
95-
96-
attachMostUsedKeywords($result, 3);
97-
61+
62+
$naming = new \headstart\preprocessing\naming\KeywordNaming($ini_array);
63+
$naming->performNaming($result, 3);
64+
9865
$input_json = json_encode($result);
9966

100-
if ($persistence->getLastVersion($unique_id) == null) {
101-
$persistence->createVisualization($unique_id, "PLOS Search: " .$query, $input_json);
67+
if ($persistence->getLastVersion($query) == false) {
68+
$persistence->createVisualization($unique_id, "PLOS Search: " . $query, $input_json);
10269
} else {
10370
$persistence->writeRevision($unique_id, $input_json);
10471
}
105-
106-
redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
107-
72+
73+
redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
10874
?>
10975
</body>
11076
</html>

0 commit comments

Comments
 (0)