diff --git a/core/home/tests.py b/core/home/tests.py new file mode 100644 index 000000000..19b77133c --- /dev/null +++ b/core/home/tests.py @@ -0,0 +1,64 @@ +from unittest.mock import patch + +from django.test import TestCase + +from collection.models import Collection +from core.users.models import User +from journal.models import Journal, SciELOJournal + +from core.home.views import _get_scielo_journals_data + + +class TestGetScieloJournalsData(TestCase): + def setUp(self): + self.user = User.objects.create(username="testuser", password="testpass") + self.collection = Collection.objects.create( + creator=self.user, + acron3="per", + domain="http://www.scielo.org.pe", + ) + self.journal = Journal.objects.create( + creator=self.user, + title="Test Journal Peru", + ) + self.scielo_journal = SciELOJournal.objects.create( + issn_scielo="2709-3689", + collection=self.collection, + journal=self.journal, + journal_acron="tjperu", + ) + + def test_scielo_url_does_not_have_double_http_prefix(self): + """URL must not contain 'http://http://' when domain already has http://""" + data = _get_scielo_journals_data() + self.assertTrue(len(data) > 0) + for item in data: + self.assertNotIn("http://http://", item["scielo_url"]) + self.assertNotIn("http://https://", item["scielo_url"]) + + def test_scielo_url_is_well_formed(self): + """URL must be a valid scielo.php URL with the correct domain""" + data = _get_scielo_journals_data() + self.assertEqual(len(data), 1) + expected_url = ( + "http://www.scielo.org.pe/scielo.php?script=sci_serial" + "&pid=2709-3689&lng=en" + ) + self.assertEqual(data[0]["scielo_url"], expected_url) + + def test_scielo_url_strips_trailing_slash_from_domain(self): + """Trailing slash in domain must not produce double slash in URL""" + self.collection.domain = "http://www.scielo.org.pe/" + self.collection.save() + data = _get_scielo_journals_data() + self.assertEqual(len(data), 1) + self.assertNotIn("//scielo.php", data[0]["scielo_url"]) + + def test_scielo_url_with_https_domain(self): + """URL must be correct when domain uses https://""" + self.collection.domain = "https://www.scielo.br" + self.collection.save() + data = _get_scielo_journals_data() + self.assertEqual(len(data), 1) + self.assertTrue(data[0]["scielo_url"].startswith("https://www.scielo.br/")) + self.assertNotIn("https://https://", data[0]["scielo_url"]) diff --git a/core/home/views.py b/core/home/views.py index 41bc7b490..e05bf415f 100644 --- a/core/home/views.py +++ b/core/home/views.py @@ -75,7 +75,7 @@ def _get_scielo_journals_data(): "", ) scielo_url = ( - f"http://{domain}/scielo.php?script=sci_serial&pid={issn_scielo}&lng=en" + f"{domain.rstrip('/')}/scielo.php?script=sci_serial&pid={issn_scielo}&lng=en" ) formatted_data.append( { diff --git a/core/templates/home/include/list_journal_page/journals_table.html b/core/templates/home/include/list_journal_page/journals_table.html index a8121d524..34f8597e3 100644 --- a/core/templates/home/include/list_journal_page/journals_table.html +++ b/core/templates/home/include/list_journal_page/journals_table.html @@ -26,7 +26,7 @@ {% elif journal.status in "DS" %} fiber_manual_record {% endif %} - {{ journal.journal__title }}{{journal.collection__main_name}} + {{ journal.journal__title }}{{journal.collection__main_name}} {% endfor %} diff --git a/core/templates/home/include/list_journal_page/journals_table_by_publisher.html b/core/templates/home/include/list_journal_page/journals_table_by_publisher.html index 2a74f3f56..ed53ed780 100644 --- a/core/templates/home/include/list_journal_page/journals_table_by_publisher.html +++ b/core/templates/home/include/list_journal_page/journals_table_by_publisher.html @@ -25,7 +25,7 @@ {% elif scielojournal.status in "DS" %} fiber_manual_record {% endif %} - {{ scielojournal.journal.title }}{{ scielojournal.collection.main_name }} + {{ scielojournal.journal.title }}{{ scielojournal.collection.main_name }} {% endfor %} diff --git a/journal/sources/classic_website.py b/journal/sources/classic_website.py index 3312bbc50..0961c09f1 100644 --- a/journal/sources/classic_website.py +++ b/journal/sources/classic_website.py @@ -19,7 +19,7 @@ def get_issn(collection): try: collections = requests.get( - f"http://{collection}/scielo.php?script=sci_alphabetic&lng=es&nrm=iso&debug=xml", + f"{collection.rstrip('/')}/scielo.php?script=sci_alphabetic&lng=es&nrm=iso&debug=xml", timeout=10, ) data = xmltodict.parse(collections.text) @@ -52,7 +52,7 @@ def get_issn(collection): def get_journal_xml(collection, issn): try: official_journal = requests.get( - f"http://{collection}/scielo.php?script=sci_serial&pid={issn}&lng=es&nrm=iso&debug=xml", + f"{collection.rstrip('/')}/scielo.php?script=sci_serial&pid={issn}&lng=es&nrm=iso&debug=xml", timeout=10, ) return xmltodict.parse(official_journal.text) diff --git a/journal/tests.py b/journal/tests.py index b67959962..fb3bbaa23 100755 --- a/journal/tests.py +++ b/journal/tests.py @@ -1,5 +1,5 @@ import json -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, Mock, patch from django.test import TestCase from django_test_migrations.migrator import Migrator @@ -387,3 +387,101 @@ def test_backward_compatibility_without_raw_fields(self): self.assertIsNone(publisher_history.raw_country_name) +from journal.sources.classic_website import get_issn, get_journal_xml + + +class TestClassicWebsiteGetIssn(TestCase): + @patch("journal.sources.classic_website.requests.get") + @patch("journal.sources.classic_website.xmltodict.parse") + def test_get_issn_url_does_not_have_double_http_prefix(self, mock_parse, mock_get): + """get_issn must not prepend http:// when domain already contains it""" + mock_response = Mock() + mock_response.text = "" + mock_get.return_value = mock_response + mock_parse.return_value = {"SERIALLIST": {"LIST": {"SERIAL": []}}} + + list(get_issn("http://www.scielo.org.pe")) + + called_url = mock_get.call_args[0][0] + self.assertNotIn("http://http://", called_url) + self.assertTrue(called_url.startswith("http://www.scielo.org.pe/")) + + @patch("journal.sources.classic_website.requests.get") + @patch("journal.sources.classic_website.xmltodict.parse") + def test_get_issn_url_strips_trailing_slash(self, mock_parse, mock_get): + """get_issn must not produce double slash when domain has trailing slash""" + mock_response = Mock() + mock_response.text = "" + mock_get.return_value = mock_response + mock_parse.return_value = {"SERIALLIST": {"LIST": {"SERIAL": []}}} + + list(get_issn("http://www.scielo.org.pe/")) + + called_url = mock_get.call_args[0][0] + self.assertNotIn("//scielo.php", called_url) + + @patch("journal.sources.classic_website.requests.get") + @patch("journal.sources.classic_website.xmltodict.parse") + def test_get_issn_url_with_https_domain(self, mock_parse, mock_get): + """get_issn must not produce double https:// when domain uses https://""" + mock_response = Mock() + mock_response.text = "" + mock_get.return_value = mock_response + mock_parse.return_value = {"SERIALLIST": {"LIST": {"SERIAL": []}}} + + list(get_issn("https://www.scielo.br")) + + called_url = mock_get.call_args[0][0] + self.assertNotIn("http://https://", called_url) + self.assertTrue(called_url.startswith("https://www.scielo.br/")) + + +class TestClassicWebsiteGetJournalXml(TestCase): + @patch("journal.sources.classic_website.requests.get") + @patch("journal.sources.classic_website.xmltodict.parse") + def test_get_journal_xml_url_does_not_have_double_http_prefix( + self, mock_parse, mock_get + ): + """get_journal_xml must not prepend http:// when domain already contains it""" + mock_response = Mock() + mock_response.text = "" + mock_get.return_value = mock_response + mock_parse.return_value = {} + + get_journal_xml("http://www.scielo.org.pe", "2709-3689") + + called_url = mock_get.call_args[0][0] + self.assertNotIn("http://http://", called_url) + self.assertTrue(called_url.startswith("http://www.scielo.org.pe/")) + + @patch("journal.sources.classic_website.requests.get") + @patch("journal.sources.classic_website.xmltodict.parse") + def test_get_journal_xml_url_strips_trailing_slash(self, mock_parse, mock_get): + """get_journal_xml must not produce double slash when domain has trailing slash""" + mock_response = Mock() + mock_response.text = "" + mock_get.return_value = mock_response + mock_parse.return_value = {} + + get_journal_xml("http://www.scielo.org.pe/", "2709-3689") + + called_url = mock_get.call_args[0][0] + self.assertNotIn("//scielo.php", called_url) + + @patch("journal.sources.classic_website.requests.get") + @patch("journal.sources.classic_website.xmltodict.parse") + def test_get_journal_xml_url_with_https_domain(self, mock_parse, mock_get): + """get_journal_xml must not produce double https:// when domain uses https://""" + mock_response = Mock() + mock_response.text = "" + mock_get.return_value = mock_response + mock_parse.return_value = {} + + get_journal_xml("https://www.scielo.br", "0034-8910") + + called_url = mock_get.call_args[0][0] + self.assertNotIn("http://https://", called_url) + self.assertTrue(called_url.startswith("https://www.scielo.br/")) + + +