-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathenrichment_module.py
More file actions
335 lines (270 loc) · 11.6 KB
/
enrichment_module.py
File metadata and controls
335 lines (270 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
"""
Functions for Enrichment Module
"""
import GlobalData as GD
from PIL import Image
import json
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.utils as pu
import scipy.stats as st
from plotly.subplots import make_subplots
import math
ALPHA_VALUES = [0.05, 0.01, 0.005, 0.001]
MAX_AMOUNT_RESULTS = 20
COLOR_FEATURE_QUERY = (255, 166, 0, 150)
COLOR_FEATURE_BACKGROUND = (99, 110, 250, 100)
COLOR_NOTFEATURE_QUERY = (3, 218, 198, 100)
COLOR_BACKGROUND = (55, 55, 55, 30)
def validate():
# check for set alpha
if "enrichment-cutoff" not in GD.pdata.keys():
GD.pdata["enrichment-cutoff"] = 0
print("ENRICHMENT: WARNING: Significance level not selected. 0.05 significance niveau assumed.")
return True
# check for query
if "enrichment_query" not in GD.pdata.keys():
GD.pdata["enrichment_query"] = []
if GD.pdata["enrichment_query"] == []:
print("ENRICHMENT: ERROR: Query not selected.")
return False
# check for target
if "enrichment-features" not in GD.pdata.keys():
GD.pdata["enrichment-features"] = 0
if GD.pdata["enrichment-features"] == []:
print("ENRICHMENT: WARNING: Features not selected. Default features assumed.")
return True
return True
def query_from_clipboard():
# function to move clipboard to query field
GD.pdata["enrichment_query"] = []
if "cbnode" not in GD.pdata.keys():
print("ENRICHMENT: Clipboard empty")
GD.savePD()
return
for node in GD.pdata["cbnode"]:
GD.pdata["enrichment_query"].append(node)
GD.savePD()
def query_clear():
# function to clear query field
GD.pdata["enrichment_query"] = []
GD.savePD()
def _plot(data, highlight_bar=None):
# preprocess
sorted_data = dict(sorted(data.items(), key=lambda item: item[1]))
data_size = len(sorted_data.items())
display_note = None
if len(sorted_data.items()) > MAX_AMOUNT_RESULTS:
display_note = f"Warning: {MAX_AMOUNT_RESULTS} of {data_size} hits shown."
data_size = MAX_AMOUNT_RESULTS
if data_size == 0:
display_note = "Warning: No significant feature hits."
return
names = list(sorted_data.keys())[:data_size]
values = list(sorted_data.values())[:data_size]
categories = [{"name": names[i], "value": values[i]} for i in range(data_size)]
colors = ["#636efa" if i != highlight_bar else "orange" for i in range(data_size)]
# Create subplots for each category
subplots = make_subplots(
rows=data_size,
cols=1,
subplot_titles=[f'{categories[i]["name"]} :: {categories[i]["value"]:.2e}' for i in range(len(categories))],
shared_xaxes=True,
print_grid=False,
vertical_spacing=(0.45 / len(categories)),
)
# Add bars for each category
for k, category in enumerate(categories):
subplots.add_trace(
go.Bar(
x=[-math.log(category["value"])],
y=[1],
orientation='h',
hoverinfo='text',
text=f'{category["name"]} :: {category["value"]:.2e}',
marker=dict(color=colors[k]),
customdata=[[k, category["name"]]] # retrievable for ui and responsive feedback
),
row=k + 1, col=1
)
# Update the layout
subplots.update_layout(
font_color="rgb(200,200,200)",
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
showlegend=False,
title=None,
yaxis=dict(categoryorder="total ascending", fixedrange=True),
# https://plotly.com/python-api-reference/generated/plotly.graph_objects.layout.html#plotly.graph_objects.layout.XAxis
xaxis=dict(
fixedrange=True,
type="log",
zeroline=True,
showticklabels=False
),
bargap=0.1,
height=45 * len(categories),
margin=dict(l=20, r=20, t=40, b=20),
dragmode=False
)
for annotation in subplots["layout"]['annotations']:
annotation['x'] = 0
annotation['xanchor'] = 'left'
annotation['align'] = 'left'
annotation['font'] = dict(
size=12,
)
# Hide the axes
for axis in subplots['layout']:
if axis.startswith('yaxis'):
subplots['layout'][axis]['visible'] = False
# Update the margins and size
subplots['layout']['margin'] = {
'l': 5,
'r': 0,
't': 20,
'b': 1,
}
for i in range(data_size):
subplots.update_xaxes(type='log', row=i+1, col=1, showticklabels=False)
height_calc = max([45 * len(categories), 350])
subplots['layout']['height'] = height_calc
subplots['layout']['width'] = 400
plotly_json = json.dumps(subplots, cls=pu.PlotlyJSONEncoder)
return plotly_json, display_note
def _fisher_test(sig_level, sampleset, d_sample_attributes, d_attributes_sample, background):
"""
Implementation by Felix Mueller
Perform a hypergeometric or Fisher's exact test for each feature in a set of samples,
using the corresponding attributes in d_sample_attributes and d_attributes_sample.
Parameters:
sig_level (float): the significance level of the test.
sampleset (set): the set of samples to be tested.
d_sample_attributes (dict): a dictionary with samples as keys and lists of attributes as values.
d_attributes_sample (dict): a dictionary with attributes as keys and sets of samples as values.
background (int): the total number of samples in the population.
Returns:
d_term_p (dict): a dictionary with attributes as keys and adjusted p-values as values.
"""
# Make sure that all samples in the sampleset are present in the d_sample_attributes dictionary
sample_overlap = set(sampleset) & set(d_sample_attributes.keys())
# Extract all attributes associated with the samples in the sampleset
l_terms = []
for gene in sample_overlap:
l_terms.extend(d_sample_attributes[gene])
# Find the unique set of attributes and the number of tests to be performed
set_terms = set(l_terms)
number_of_tests = len(set_terms)
# Perform the test for each attribute and calculate adjusted p-values
d_term_p = {}
for term in set_terms:
attributeset = set(d_attributes_sample[term])
ab = len(sample_overlap.intersection(attributeset))
amb = len(sample_overlap.difference(attributeset))
bma = len(attributeset.difference(sample_overlap))
backg = background - ab - amb - bma
oddsratio, pval = st.fisher_exact([[ab, amb], [bma, backg]], alternative='greater')
adjusted_pval = pval * number_of_tests
# Adjust the p-value based on the number of tests performed (Bonferroni)
if adjusted_pval <= sig_level:
d_term_p[term] = adjusted_pval
return d_term_p
def _gen_highlight_textures(query_ids, feature_type, feature):
path_nodes = "static/projects/"+ GD.data["actPro"] + "/layoutsRGB/temp_enrichment.png"
path_links = "static/projects/"+ GD.data["actPro"] + "/linksRGB/temp_enrichment.png"
nodes = GD.nodes["nodes"]
links = GD.links["links"]
query_id_set = set(query_ids)
annotation_set = set(GD.annotations[feature_type][feature])
node_colors = []
for node in nodes:
node_color = COLOR_BACKGROUND
# is query but not has not feature
if node["id"] in annotation_set and node["id"] not in query_id_set:
node_color = COLOR_FEATURE_BACKGROUND
# is not query but has feature
if node["id"] not in annotation_set and node["id"] in query_id_set:
node_color = COLOR_NOTFEATURE_QUERY
# is query and has feature
if node["id"] in annotation_set and node["id"] in query_id_set:
node_color = COLOR_FEATURE_QUERY
node_colors.append(node_color)
texture_nodes_active = Image.open("static/projects/"+ GD.data["actPro"] + "/layoutsRGB/"+ GD.pfile["layoutsRGB"][int(GD.pdata["layoutsRGBDD"])]+".png","r")
texture_nodes = texture_nodes_active.copy()
texture_nodes.putdata(node_colors)
texture_nodes.save(path_nodes, "PNG")
# generate link texture
link_colors = []
for link in links:
link_color = COLOR_BACKGROUND
start, end = int(link["s"]), int(link["e"])
# both no feature and query
if start in annotation_set and start not in query_id_set and end in annotation_set and end not in query_id_set:
link_color = COLOR_FEATURE_BACKGROUND
# both feature and no query
if start not in annotation_set and start in query_id_set and end not in annotation_set and end in query_id_set:
link_color = COLOR_NOTFEATURE_QUERY
# both feature and query
if start in annotation_set and start in query_id_set and end in annotation_set and end in query_id_set:
link_color = COLOR_FEATURE_QUERY
link_colors.append(link_color)
texture_links_active = Image.open("static/projects/"+ GD.data["actPro"] + "/linksRGB/"+ GD.pfile["linksRGB"][int(GD.pdata["linksRGBDD"])]+".png","r")
texture_links = texture_links_active.copy()
texture_links.putdata(link_colors)
texture_links.save(path_links, "PNG")
texture_links_active.close()
texture_nodes_active.close()
texture_links.close()
texture_nodes.close()
return {"path_nodes" : path_nodes, "path_links": path_links, "textures_created": True}
def main(highlight=None):
# main process
query_set = []
test_result = {}
features_type = None
if highlight is None:
query_set = [int(node["id"]) for node in GD.pdata["enrichment_query"]]
alpha = ALPHA_VALUES[int(GD.pdata["enrichment-cutoff"])]
features_type = GD.annotation_types[int(GD.pdata["enrichment-features"])]
dict_features_to_samples = GD.annotations[features_type]
dict_samples_to_features = {
node: GD.nodes["nodes"][node]["attrlist"].get(features_type, []) for node in query_set # case of type annotations
} if GD.pfile.get("annotationTypes", False) else {
node: GD.nodes["nodes"][node]["attrlist"][1:] for node in query_set # case of list annotations, split off name from attrlist
}
background_count = int(GD.pfile["nodecount"])
# run tests
test_result = _fisher_test(
sig_level = alpha,
sampleset = query_set,
d_sample_attributes = dict_samples_to_features,
d_attributes_sample = dict_features_to_samples,
background = background_count
)
if not test_result:
return None, None, None, "Warning: No significant feature hits."
# build plot and insert responsive payload
highlight_bar, highlight_feature, highlight_feature_type, highlight_results, highlight_query_ids = None, None, features_type, test_result, query_set
if highlight is not None:
highlight_bar, highlight_feature, highlight_feature_type, highlight_results, highlight_query_ids = highlight
plot_json, display_note = _plot(
data = highlight_results,
highlight_bar=highlight_bar
)
# color highlighted annotation
texture_obj = None
if highlight is not None:
try:
texture_obj = _gen_highlight_textures(
query_ids = highlight_query_ids,
feature_type = highlight_feature_type,
feature = highlight_feature
)
except:
texture_obj = {"textures_created": False}
# return results
payload = [highlight_feature_type, highlight_results, highlight_query_ids]
return plot_json, payload, texture_obj, display_note
if __name__ == "__main__":
pass