-
Notifications
You must be signed in to change notification settings - Fork 10
Fix malformed journal links with double http:// prefix #1357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| from unittest.mock import patch | ||
|
|
||
| from django.test import TestCase | ||
|
|
||
| from collection.models import Collection | ||
| from core.users.models import User | ||
| from journal.models import Journal, SciELOJournal | ||
|
|
||
| from core.home.views import _get_scielo_journals_data | ||
|
|
||
|
|
||
| class TestGetScieloJournalsData(TestCase): | ||
| def setUp(self): | ||
| self.user = User.objects.create(username="testuser", password="testpass") | ||
| self.collection = Collection.objects.create( | ||
| creator=self.user, | ||
| acron3="per", | ||
| domain="http://www.scielo.org.pe", | ||
| ) | ||
| self.journal = Journal.objects.create( | ||
| creator=self.user, | ||
| title="Test Journal Peru", | ||
| ) | ||
| self.scielo_journal = SciELOJournal.objects.create( | ||
| issn_scielo="2709-3689", | ||
| collection=self.collection, | ||
| journal=self.journal, | ||
| journal_acron="tjperu", | ||
| ) | ||
|
|
||
| def test_scielo_url_does_not_have_double_http_prefix(self): | ||
| """URL must not contain 'http://http://' when domain already has http://""" | ||
| data = _get_scielo_journals_data() | ||
| self.assertTrue(len(data) > 0) | ||
| for item in data: | ||
| self.assertNotIn("http://http://", item["scielo_url"]) | ||
| self.assertNotIn("http://https://", item["scielo_url"]) | ||
|
|
||
| def test_scielo_url_is_well_formed(self): | ||
| """URL must be a valid scielo.php URL with the correct domain""" | ||
| data = _get_scielo_journals_data() | ||
| self.assertEqual(len(data), 1) | ||
| expected_url = ( | ||
| "http://www.scielo.org.pe/scielo.php?script=sci_serial" | ||
| "&pid=2709-3689&lng=en" | ||
| ) | ||
| self.assertEqual(data[0]["scielo_url"], expected_url) | ||
|
|
||
| def test_scielo_url_strips_trailing_slash_from_domain(self): | ||
| """Trailing slash in domain must not produce double slash in URL""" | ||
| self.collection.domain = "http://www.scielo.org.pe/" | ||
| self.collection.save() | ||
| data = _get_scielo_journals_data() | ||
| self.assertEqual(len(data), 1) | ||
| self.assertNotIn("//scielo.php", data[0]["scielo_url"]) | ||
|
|
||
| def test_scielo_url_with_https_domain(self): | ||
| """URL must be correct when domain uses https://""" | ||
| self.collection.domain = "https://www.scielo.br" | ||
| self.collection.save() | ||
| data = _get_scielo_journals_data() | ||
| self.assertEqual(len(data), 1) | ||
| self.assertTrue(data[0]["scielo_url"].startswith("https://www.scielo.br/")) | ||
| self.assertNotIn("https://https://", data[0]["scielo_url"]) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,7 @@ | |
| {% elif journal.status in "DS" %} | ||
| <span class="material-icons align-middle fs-5" style="color: #c63800;">fiber_manual_record</span> | ||
| {% endif %} | ||
| <a target="_blank" href="http://{{ journal.collection__domain }}/scielo.php?script=sci_serial&pid={{journal.issn_scielo}}&lng=en">{{ journal.journal__title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{journal.collection__main_name}}</span> | ||
| <a target="_blank" href="{{ journal.collection__domain }}/scielo.php?script=sci_serial&pid={{journal.issn_scielo}}&lng=en">{{ journal.journal__title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{journal.collection__main_name}}</span> | ||
| </th> | ||
|
Comment on lines
28
to
30
|
||
| </tr> | ||
| {% endfor %} | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -25,7 +25,7 @@ | |||||||||||||||
| {% elif scielojournal.status in "DS" %} | ||||||||||||||||
| <span class="material-icons align-middle fs-5" style="color: #c63800;">fiber_manual_record</span> | ||||||||||||||||
| {% endif %} | ||||||||||||||||
| <a target="_blank" href="http://{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span> | ||||||||||||||||
| <a target="_blank" href="{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span> | ||||||||||||||||
|
||||||||||||||||
| <a target="_blank" href="{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a><span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span> | |
| {% if "://" in scielojournal.collection.domain %} | |
| <a target="_blank" href="{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a> | |
| {% else %} | |
| <a target="_blank" href="https://{{ scielojournal.collection.domain }}/scielo.php?script=sci_serial&pid={{ scielojournal.issn_scielo }}&lng=en">{{ scielojournal.journal.title }}</a> | |
| {% endif %} | |
| <span style="color: #888; font-size: 0.9em; opacity: 0.7; padding: 4px 8px;">{{ scielojournal.collection.main_name }}</span> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,7 +19,7 @@ | |
| def get_issn(collection): | ||
| try: | ||
| collections = requests.get( | ||
| f"http://{collection}/scielo.php?script=sci_alphabetic&lng=es&nrm=iso&debug=xml", | ||
| f"{collection.rstrip('/')}/scielo.php?script=sci_alphabetic&lng=es&nrm=iso&debug=xml", | ||
| timeout=10, | ||
|
Comment on lines
21
to
23
|
||
| ) | ||
| data = xmltodict.parse(collections.text) | ||
|
|
@@ -52,7 +52,7 @@ def get_issn(collection): | |
| def get_journal_xml(collection, issn): | ||
| try: | ||
| official_journal = requests.get( | ||
| f"http://{collection}/scielo.php?script=sci_serial&pid={issn}&lng=es&nrm=iso&debug=xml", | ||
| f"{collection.rstrip('/')}/scielo.php?script=sci_serial&pid={issn}&lng=es&nrm=iso&debug=xml", | ||
| timeout=10, | ||
|
Comment on lines
54
to
56
|
||
| ) | ||
| return xmltodict.parse(official_journal.text) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
collection__domainis not consistently stored with a URL scheme in this repo (tests/fixtures use values likewww.scielo.br). After this change, ifdomainhas no scheme the generated URL becomeswww.scielo.br/scielo.php?...(relative/invalid in many contexts). Also, ifdomainisNone, calling.rstrip('/')will raise. Suggest normalizing safely:domain = (domain or "").rstrip("/")and prepend a scheme only when missing (e.g.if domain and "://" not in domain: domain = "http://" + domain).