Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions core/home/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from unittest.mock import patch

from django.test import TestCase

from collection.models import Collection
from core.users.models import User
from journal.models import Journal, SciELOJournal

from core.home.views import _get_scielo_journals_data


class TestGetScieloJournalsData(TestCase):
def setUp(self):
self.user = User.objects.create(username="testuser", password="testpass")
self.collection = Collection.objects.create(
creator=self.user,
acron3="per",
domain="http://www.scielo.org.pe",
)
self.journal = Journal.objects.create(
creator=self.user,
title="Test Journal Peru",
)
self.scielo_journal = SciELOJournal.objects.create(
issn_scielo="2709-3689",
collection=self.collection,
journal=self.journal,
journal_acron="tjperu",
)

def test_scielo_url_does_not_have_double_http_prefix(self):
"""URL must not contain 'http://http://' when domain already has http://"""
data = _get_scielo_journals_data()
self.assertTrue(len(data) > 0)
for item in data:
self.assertNotIn("http://http://", item["scielo_url"])
self.assertNotIn("http://https://", item["scielo_url"])

def test_scielo_url_is_well_formed(self):
"""URL must be a valid scielo.php URL with the correct domain"""
data = _get_scielo_journals_data()
self.assertEqual(len(data), 1)
expected_url = (
"http://www.scielo.org.pe/scielo.php?script=sci_serial"
"&pid=2709-3689&lng=en"
)
self.assertEqual(data[0]["scielo_url"], expected_url)

def test_scielo_url_strips_trailing_slash_from_domain(self):
"""Trailing slash in domain must not produce double slash in URL"""
self.collection.domain = "http://www.scielo.org.pe/"
self.collection.save()
data = _get_scielo_journals_data()
self.assertEqual(len(data), 1)
self.assertNotIn("//scielo.php", data[0]["scielo_url"])

def test_scielo_url_with_https_domain(self):
"""URL must be correct when domain uses https://"""
self.collection.domain = "https://www.scielo.br"
self.collection.save()
data = _get_scielo_journals_data()
self.assertEqual(len(data), 1)
self.assertTrue(data[0]["scielo_url"].startswith("https://www.scielo.br/"))
self.assertNotIn("https://https://", data[0]["scielo_url"])
2 changes: 1 addition & 1 deletion core/home/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _get_scielo_journals_data():
"",
)
scielo_url = (
f"http://{domain}/scielo.php?script=sci_serial&pid={issn_scielo}&lng=en"
f"{domain.rstrip('/')}/scielo.php?script=sci_serial&pid={issn_scielo}&lng=en"
)
Comment on lines 77 to 79
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collection__domain is not consistently stored with a URL scheme in this repo (tests/fixtures use values like www.scielo.br). After this change, if domain has no scheme the generated URL becomes www.scielo.br/scielo.php?... (relative/invalid in many contexts). Also, if domain is None, calling .rstrip('/') will raise. Suggest normalizing safely: domain = (domain or "").rstrip("/") and prepend a scheme only when missing (e.g. if domain and "://" not in domain: domain = "http://" + domain).

Copilot uses AI. Check for mistakes.
formatted_data.append(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
{% elif journal.status in "DS" %}
<span class="material-icons align-middle fs-5" style="color: #c63800;">fiber_manual_record</span>
{% endif %}
<a target="_blank" href="http://{{ journal.collection__domain }}/scielo.php?script=sci_serial&pid={{journal.issn_scielo}}&lng=en">{{ journal.journal__title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{journal.collection__main_name}}</span>
<a target="_blank" href="{{ journal.collection__domain }}/scielo.php?script=sci_serial&pid={{journal.issn_scielo}}&lng=en">{{ journal.journal__title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{journal.collection__main_name}}</span>
</th>
Comment on lines 28 to 30
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumes journal.collection__domain already includes a scheme. In this codebase it can be stored as host-only (e.g. www.scielo.br in tests/fixtures), in which case this href becomes a relative URL and breaks. Consider adding a conditional to prefix http:// only when the domain lacks ://, or (preferably) pass a pre-normalized scielo_url from the view/context.

Copilot uses AI. Check for mistakes.
</tr>
{% endfor %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
{% elif scielojournal.status in "DS" %}
<span class="material-icons align-middle fs-5" style="color: #c63800;">fiber_manual_record</span>
{% endif %}
<a target="_blank" href="http://{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span>
<a target="_blank" href="{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span>
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue here: scielojournal.collection.domain may be stored without http(s):// (host-only), so this href can become relative/invalid. Add logic to prepend a scheme only when missing (e.g. check for ://), or build and provide a normalized URL from Python to keep templates simple and consistent.

Suggested change
<a target="_blank" href="{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span>
{% if "://" in scielojournal.collection.domain %}
<a target="_blank" href="{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a>
{% else %}
<a target="_blank" href="https://{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a>
{% endif %}
<span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span>

Copilot uses AI. Check for mistakes.
{% endfor %}
</ul>
</td>
Expand Down
4 changes: 2 additions & 2 deletions journal/sources/classic_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def get_issn(collection):
try:
collections = requests.get(
f"http://{collection}/scielo.php?script=sci_alphabetic&lng=es&nrm=iso&debug=xml",
f"{collection.rstrip('/')}/scielo.php?script=sci_alphabetic&lng=es&nrm=iso&debug=xml",
timeout=10,
Comment on lines 21 to 23
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collection can be stored without a URL scheme (e.g. tests/fixtures use www.scielo.br). With the new f-string, requests.get() will receive a URL like www.scielo.br/scielo.php?... (missing scheme) and fail. Consider normalizing before building the URL: strip trailing / and prepend a scheme only when one is missing (e.g., if "://" not in collection: collection = "http://" + collection). Also guard against None values (e.g., collection = (collection or "")).

Copilot uses AI. Check for mistakes.
)
data = xmltodict.parse(collections.text)
Expand Down Expand Up @@ -52,7 +52,7 @@ def get_issn(collection):
def get_journal_xml(collection, issn):
try:
official_journal = requests.get(
f"http://{collection}/scielo.php?script=sci_serial&pid={issn}&lng=es&nrm=iso&debug=xml",
f"{collection.rstrip('/')}/scielo.php?script=sci_serial&pid={issn}&lng=es&nrm=iso&debug=xml",
timeout=10,
Comment on lines 54 to 56
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as above: if collection lacks a scheme (common in this codebase, e.g. Collection(domain="www.scielo.br") in tests), this will generate a URL without http(s):// and requests.get() will fail. Normalize collection once (strip trailing /, add scheme if missing) before constructing the request URL, and handle None safely.

Copilot uses AI. Check for mistakes.
)
return xmltodict.parse(official_journal.text)
Expand Down
100 changes: 99 additions & 1 deletion journal/tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock, Mock, patch

from django.test import TestCase
from django_test_migrations.migrator import Migrator
Expand Down Expand Up @@ -387,3 +387,101 @@ def test_backward_compatibility_without_raw_fields(self):
self.assertIsNone(publisher_history.raw_country_name)


from journal.sources.classic_website import get_issn, get_journal_xml


class TestClassicWebsiteGetIssn(TestCase):
@patch("journal.sources.classic_website.requests.get")
@patch("journal.sources.classic_website.xmltodict.parse")
def test_get_issn_url_does_not_have_double_http_prefix(self, mock_parse, mock_get):
"""get_issn must not prepend http:// when domain already contains it"""
mock_response = Mock()
mock_response.text = ""
mock_get.return_value = mock_response
mock_parse.return_value = {"SERIALLIST": {"LIST": {"SERIAL": []}}}

list(get_issn("http://www.scielo.org.pe"))

called_url = mock_get.call_args[0][0]
self.assertNotIn("http://http://", called_url)
self.assertTrue(called_url.startswith("http://www.scielo.org.pe/"))

@patch("journal.sources.classic_website.requests.get")
@patch("journal.sources.classic_website.xmltodict.parse")
def test_get_issn_url_strips_trailing_slash(self, mock_parse, mock_get):
"""get_issn must not produce double slash when domain has trailing slash"""
mock_response = Mock()
mock_response.text = ""
mock_get.return_value = mock_response
mock_parse.return_value = {"SERIALLIST": {"LIST": {"SERIAL": []}}}

list(get_issn("http://www.scielo.org.pe/"))

called_url = mock_get.call_args[0][0]
self.assertNotIn("//scielo.php", called_url)

@patch("journal.sources.classic_website.requests.get")
@patch("journal.sources.classic_website.xmltodict.parse")
def test_get_issn_url_with_https_domain(self, mock_parse, mock_get):
"""get_issn must not produce double https:// when domain uses https://"""
mock_response = Mock()
mock_response.text = ""
mock_get.return_value = mock_response
mock_parse.return_value = {"SERIALLIST": {"LIST": {"SERIAL": []}}}

list(get_issn("https://www.scielo.br"))

called_url = mock_get.call_args[0][0]
self.assertNotIn("http://https://", called_url)
self.assertTrue(called_url.startswith("https://www.scielo.br/"))


class TestClassicWebsiteGetJournalXml(TestCase):
@patch("journal.sources.classic_website.requests.get")
@patch("journal.sources.classic_website.xmltodict.parse")
def test_get_journal_xml_url_does_not_have_double_http_prefix(
self, mock_parse, mock_get
):
"""get_journal_xml must not prepend http:// when domain already contains it"""
mock_response = Mock()
mock_response.text = ""
mock_get.return_value = mock_response
mock_parse.return_value = {}

get_journal_xml("http://www.scielo.org.pe", "2709-3689")

called_url = mock_get.call_args[0][0]
self.assertNotIn("http://http://", called_url)
self.assertTrue(called_url.startswith("http://www.scielo.org.pe/"))

@patch("journal.sources.classic_website.requests.get")
@patch("journal.sources.classic_website.xmltodict.parse")
def test_get_journal_xml_url_strips_trailing_slash(self, mock_parse, mock_get):
"""get_journal_xml must not produce double slash when domain has trailing slash"""
mock_response = Mock()
mock_response.text = ""
mock_get.return_value = mock_response
mock_parse.return_value = {}

get_journal_xml("http://www.scielo.org.pe/", "2709-3689")

called_url = mock_get.call_args[0][0]
self.assertNotIn("//scielo.php", called_url)

@patch("journal.sources.classic_website.requests.get")
@patch("journal.sources.classic_website.xmltodict.parse")
def test_get_journal_xml_url_with_https_domain(self, mock_parse, mock_get):
"""get_journal_xml must not produce double https:// when domain uses https://"""
mock_response = Mock()
mock_response.text = ""
mock_get.return_value = mock_response
mock_parse.return_value = {}

get_journal_xml("https://www.scielo.br", "0034-8910")

called_url = mock_get.call_args[0][0]
self.assertNotIn("http://https://", called_url)
self.assertTrue(called_url.startswith("https://www.scielo.br/"))