Skip to content

Commit b34952c

Browse files
authored
Merge pull request #217 from OpenUpSA/isoalte-rbac-fix
Refactor bulk_send_to_elastic function and enhance SQL query to inclu…
2 parents 42bca7a + d532eed commit b34952c

File tree

3 files changed

+42
-66
lines changed

3 files changed

+42
-66
lines changed

helpers.py

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -85,45 +85,7 @@ def load_json_schema(filename: str) -> Dict[str, Any]:
8585
)
8686

8787

88-
# Bulk ES helper
89-
def bulk_send_to_elastic(documents):
90-
"""Send a batch of documents to Elasticsearch using the _bulk API."""
91-
if not documents:
92-
return True
9388

94-
bulk_lines = []
95-
for doc in documents:
96-
# Flatten isolate_data if present (same logic as send_to_elastic2)
97-
if 'isolate_data' in doc and doc['isolate_data']:
98-
isolate_data = doc['isolate_data']
99-
if isinstance(isolate_data, str):
100-
try:
101-
isolate_data = json.loads(isolate_data)
102-
except Exception:
103-
isolate_data = {}
104-
if isinstance(isolate_data, dict):
105-
for key, value in isolate_data.items():
106-
if key not in doc:
107-
doc[key] = value
108-
del doc['isolate_data']
109-
doc_id = doc.get("id")
110-
action = {"index": {"_id": doc_id}} if doc_id else {"index": {}}
111-
bulk_lines.append(json.dumps(action))
112-
# Use json_serial for datetime/date serialization
113-
bulk_lines.append(json.dumps(doc, default=json_serial))
114-
bulk_data = "\n".join(bulk_lines) + "\n"
115-
116-
url = f"{settings.ELASTICSEARCH_URL}/{settings.ELASTICSEARCH_INDEX}/_bulk"
117-
headers = {"Content-Type": "application/x-ndjson"}
118-
try:
119-
response = requests.post(url, data=bulk_data, headers=headers, timeout=30)
120-
if response.status_code not in (200, 201):
121-
print(f"Bulk indexing failed: {response.text}")
122-
return False
123-
return True
124-
except Exception as e:
125-
print(f"Exception during bulk ES indexing: {e}")
126-
return False
12789

12890
sg_api_key = SENDGRID_API_KEY
12991
sg_from_email = SENDGRID_FROM_EMAIL
@@ -1462,4 +1424,44 @@ def delete_from_elastic(submission_id):
14621424
return False
14631425
except Exception as e:
14641426
print(f"Error deleting documents from Elasticsearch: {e}")
1427+
return False
1428+
1429+
# Bulk ES helper
1430+
def bulk_send_to_elastic(documents):
1431+
"""Send a batch of documents to Elasticsearch using the _bulk API."""
1432+
if not documents:
1433+
return True
1434+
1435+
bulk_lines = []
1436+
for doc in documents:
1437+
# Flatten isolate_data if present (same logic as send_to_elastic2)
1438+
if 'isolate_data' in doc and doc['isolate_data']:
1439+
isolate_data = doc['isolate_data']
1440+
if isinstance(isolate_data, str):
1441+
try:
1442+
isolate_data = json.loads(isolate_data)
1443+
except Exception:
1444+
isolate_data = {}
1445+
if isinstance(isolate_data, dict):
1446+
for key, value in isolate_data.items():
1447+
if key not in doc:
1448+
doc[key] = value
1449+
del doc['isolate_data']
1450+
doc_id = doc.get("id")
1451+
action = {"index": {"_id": doc_id}} if doc_id else {"index": {}}
1452+
bulk_lines.append(json.dumps(action))
1453+
# Use json_serial for datetime/date serialization
1454+
bulk_lines.append(json.dumps(doc, default=json_serial))
1455+
bulk_data = "\n".join(bulk_lines) + "\n"
1456+
1457+
url = f"{settings.ELASTICSEARCH_URL}/{settings.ELASTICSEARCH_INDEX}/_bulk"
1458+
headers = {"Content-Type": "application/x-ndjson"}
1459+
try:
1460+
response = requests.post(url, data=bulk_data, headers=headers, timeout=30)
1461+
if response.status_code not in (200, 201):
1462+
print(f"Bulk indexing failed: {response.text}")
1463+
return False
1464+
return True
1465+
except Exception as e:
1466+
print(f"Exception during bulk ES indexing: {e}")
14651467
return False

test/test_search.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -600,26 +600,10 @@ def test_search_access_control_private_project_all_roles(
600600
@pytest.mark.search
601601
@pytest.mark.rbac
602602
@pytest.mark.e2e
603-
@pytest.mark.xfail(
604-
reason="BUG: publish2 endpoint doesn't fetch visibility field (app.py:2688-2693). "
605-
"Should add 'p.privacy as visibility' to the SELECT query. "
606-
"Currently visibility field is missing from ES documents, so access control fails."
607-
)
608603
def test_search_access_control_private_project_external_user(
609604
client, external_user_token, private_project_with_submission
610605
):
611-
"""Test that external users cannot search private project data
612-
613-
KNOWN ISSUE: This test currently fails because the publish2 endpoint doesn't
614-
include the visibility field when indexing documents to Elasticsearch.
615-
The SELECT query needs to be updated to include: p.privacy as visibility, p.name as project_name
616-
"""
617-
# Debug: Print project details
618-
print(f"\\nPrivate project ID: {private_project_with_submission['project']['id']}")
619-
print(
620-
f"Private project privacy: {private_project_with_submission['project']['privacy']}"
621-
)
622-
606+
"""Test that external users cannot search private project data"""
623607
search_query = {
624608
"query": {
625609
"match": {"project_id": private_project_with_submission["project"]["id"]}
@@ -638,16 +622,6 @@ def test_search_access_control_private_project_external_user(
638622
assert response.status_code == 200
639623
result = response.get_json()
640624

641-
# Debug
642-
print(f"Got {result['hits']['total']['value']} results")
643-
if result["hits"]["hits"]:
644-
print(
645-
f"First result visibility: {result['hits']['hits'][0]['_source'].get('visibility', 'NOT SET')}"
646-
)
647-
print(
648-
f"First result project_id: {result['hits']['hits'][0]['_source'].get('project_id', 'NOT SET')}"
649-
)
650-
651625
# External users should not see private project data
652626
# The access filter should prevent this
653627
assert result["hits"]["total"]["value"] == 0, (

worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ async def process_sequence_validation(job):
9999

100100
# Get updated isolate data for bulk ES update
101101
cursor.execute("""
102-
SELECT i.*, s.project_id, p.pathogen_id
102+
SELECT i.*, s.project_id, p.pathogen_id, p.privacy as visibility
103103
FROM isolates i
104104
LEFT JOIN submissions s ON i.submission_id = s.id
105105
LEFT JOIN projects p ON s.project_id = p.id

0 commit comments

Comments
 (0)