-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
114 lines (93 loc) · 2.98 KB
/
app.py
File metadata and controls
114 lines (93 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
""" Flask app to provide backend for the Search Engine Web Client """
from flask import Flask, request, jsonify, make_response, render_template
from flask_cors import CORS
import os
import pickle
import glob
from lsh_search_engine.settings import NUM_ROWS, NUM_BANDS
from helper import (
perform_lsh,
LRUCache,
process_query,
find_similar_docs,
get_data_for_docId,
)
# Initialize Flask app
app = Flask(
__name__,
static_url_path="",
static_folder="search_client/static",
template_folder="search_client/templates",
)
CORS(app)
app.config["DEBUG"] = True # Change to False in Production
docs_buckets = None
# Initialize Cache
cache = LRUCache(100)
@app.route("/", methods=["GET"])
def home():
""" Route to serve home page of the Web App """
return render_template("index.html")
@app.route("/api/search-results", methods=["GET"])
def api_search():
"""
API Route for querying the backend.
* Params - query="<query_string>"
* Return Format - [(speciesType, sequence)]
Processes the query -> Looks in cache -> If results not found, Looks in Index -> Returns results
"""
params = request.args
query = params["query"]
# Validate Request Parameters
try:
assert type(query) == str
except AssertionError:
response = make_response("Invalid Request Parameters", 400)
return response
# Search Query Results in cache
cache_search = cache.get(query)
if cache_search != -1:
results = cache_search
else:
print("Processing Non Cache\n")
query_buckets = process_query(query)
results = find_similar_docs(query_buckets, docs_buckets)
cache.put(query, results)
results_with_data = []
for docId in results:
specie_name, dna_seq = get_data_for_docId(docId)
results_with_data.append((specie_name, dna_seq))
# Convert the list of results to JSON format.
return jsonify(results_with_data)
def calculate_similarity():
sim = (1 / NUM_BANDS) ** (1 / NUM_ROWS) * 100
return str(sim)[:4]
if __name__ == "__main__":
print("Do you want to regenerate LSH Hash Buckets? ( y/n ) :", end=" ")
response = input()
if response.lower() == "y":
print("Deleting pickle files.")
for f in glob.glob("*.pkl"):
os.remove(f)
print(
"[ALGO] Matching all documents with similarity >=",
calculate_similarity(),
"%",
)
try:
# Raise Error if data set doesn't exist.
if not os.path.isdir("./dataset"):
raise Exception("Dataset not found")
if not os.path.isfile("hash.pkl"):
docs_buckets = perform_lsh()
f = open("hash.pkl", "wb")
pickle.dump(docs_buckets, f)
else:
f = open("hash.pkl", "rb")
docs_buckets = pickle.load(f)
except Exception as e:
print(e)
print("Aborting! Please Try Again.")
exit()
# Start the Server process
app.run(use_reloader=False)