diff --git a/api/crossref/views.py b/api/crossref/views.py
index 9baeb37d562..bf92d358ad7 100644
--- a/api/crossref/views.py
+++ b/api/crossref/views.py
@@ -7,6 +7,7 @@
from api.crossref.permissions import RequestComesFromMailgun
from osf.models import Preprint, NotificationTypeEnum
+from osf.models.base import Guid
from website import settings
from website.preprints.tasks import mint_doi_on_crossref_fail
@@ -48,6 +49,23 @@ def post(self, request):
if record.get('status').lower() == 'success' and doi:
msg = record.find('msg').text
created = bool(msg == 'Successfully added')
+ # Unversioned DOIs (no _vN suffix, e.g. 10.31233/osf.io/tnaqp) are routing
+ # aliases that always resolve to the latest version via OSF's GUID routing.
+ # Store them as 'doi_unversioned' on the v1 preprint so we can track which
+ # preprint series have had their unversioned DOI registered.
+ _, version = Guid.split_guid(guid) if guid else (None, None)
+ if not version:
+ logger.info(f'Unversioned DOI confirmed by CrossRef: {doi}')
+ if created and guid:
+ v1_preprint = Preprint.objects.filter(
+ versioned_guids__guid___id=guid,
+ versioned_guids__version=1,
+ ).first()
+ if v1_preprint:
+ v1_preprint.set_identifier_value(category='doi_unversioned', value=doi)
+ dois_processed += 1
+ continue
+
legacy_doi = preprint.get_identifier(category='legacy_doi')
if created or legacy_doi:
# Sets preprint_doi_created and saves the preprint
@@ -67,9 +85,14 @@ def post(self, request):
if 'Relation target DOI does not exist' in record.find('msg').text:
logger.warning('Related publication DOI does not exist, sending metadata again without it...')
mint_doi_on_crossref_fail.apply_async(kwargs={'preprint_id': preprint._id})
- # This error occurs when a single preprint is being updated several times in a row with the same metadata [#PLAT-944]
- elif 'less or equal to previously submitted version' in record.find('msg').text and record_count == 2:
- break
+ # This error occurs when a single preprint is being updated several times in a row
+ # with the same metadata [#PLAT-944]. Previously this broke out of the loop when
+ # record_count == 2 (single DOI submitted twice). Now batches legitimately contain
+ # 2 records (versioned + unversioned DOI), so we continue instead of break to allow
+ # the remaining record to be processed.
+ elif 'less or equal to previously submitted version' in record.find('msg').text:
+ dois_processed += 1
+ continue
else:
unexpected_errors = True
logger.info(f'Creation success email received from CrossRef for preprints: {guids}')
diff --git a/api/users/views.py b/api/users/views.py
index b920798be86..218fef32cd2 100644
--- a/api/users/views.py
+++ b/api/users/views.py
@@ -1477,7 +1477,8 @@ def post(self, request, *args, **kwargs):
user.date_last_logged_in = timezone.now()
user.external_identity[provider][provider_id] = 'VERIFIED'
- user.social[provider.lower()] = provider_id
+ if provider.lower() in OSFUser.SOCIAL_FIELDS:
+ user.social[provider.lower()] = provider_id
del user.email_verifications[token]
user.verification_key = generate_verification_key()
user.save()
diff --git a/api_tests/crossref/views/test_crossref_email_response.py b/api_tests/crossref/views/test_crossref_email_response.py
index 196c0debd2a..fb03a2c404a 100644
--- a/api_tests/crossref/views/test_crossref_email_response.py
+++ b/api_tests/crossref/views/test_crossref_email_response.py
@@ -220,3 +220,91 @@ def test_confirmation_marks_legacy_doi_as_deleted(self, app, url, preprint):
app.post(url, context_data)
assert preprint.identifiers.get(category='legacy_doi').deleted
+
+ def test_unversioned_doi_confirmation_skips_identifier_update(self, app, url, preprint):
+ versioned_doi = settings.DOI_FORMAT.format(
+ prefix=preprint.provider.doi_prefix, guid=preprint._id
+ )
+ preprint.set_identifier_value(category='doi', value=versioned_doi)
+
+ base_guid = preprint.get_guid()._id # no _vN suffix
+ unversioned_doi = settings.DOI_FORMAT.format(
+ prefix=preprint.provider.doi_prefix, guid=base_guid
+ )
+ dual_confirmation_xml = """
+
+
+ 1390675999
+ {batch_id}
+
+ {versioned_doi}
+ Successfully updated
+
+
+ {unversioned_doi}
+ Successfully added
+
+
+ 2
+ 2
+ 0
+ 0
+
+
+ """.format(
+ batch_id=preprint._id,
+ versioned_doi=versioned_doi,
+ unversioned_doi=unversioned_doi,
+ )
+
+ context_data = self.make_mailgun_payload(crossref_response=dual_confirmation_xml)
+ with capture_notifications(expect_none=True):
+ app.post(url, context_data)
+
+ preprint.reload()
+ assert preprint.get_identifier_value('doi') == versioned_doi
+ assert preprint.get_identifier_value('doi_unversioned') == unversioned_doi
+
+ def test_unversioned_doi_confirmation_update_does_not_store_doi_unversioned(self, app, url, preprint):
+ versioned_doi = settings.DOI_FORMAT.format(
+ prefix=preprint.provider.doi_prefix, guid=preprint._id
+ )
+ preprint.set_identifier_value(category='doi', value=versioned_doi)
+
+ base_guid = preprint.get_guid()._id
+ unversioned_doi = settings.DOI_FORMAT.format(
+ prefix=preprint.provider.doi_prefix, guid=base_guid
+ )
+ update_confirmation_xml = """
+
+
+ 1390676000
+ {batch_id}
+
+ {versioned_doi}
+ Successfully updated
+
+
+ {unversioned_doi}
+ Successfully updated
+
+
+ 2
+ 2
+ 0
+ 0
+
+
+ """.format(
+ batch_id=preprint._id,
+ versioned_doi=versioned_doi,
+ unversioned_doi=unversioned_doi,
+ )
+
+ context_data = self.make_mailgun_payload(crossref_response=update_confirmation_xml)
+ with capture_notifications(expect_none=True):
+ app.post(url, context_data)
+
+ preprint.reload()
+ assert preprint.get_identifier_value('doi') == versioned_doi
+ assert preprint.get_identifier_value('doi_unversioned') is None
diff --git a/osf/management/commands/resync_preprint_dois_v1.py b/osf/management/commands/resync_preprint_dois_v1.py
new file mode 100644
index 00000000000..31c04cf13f7
--- /dev/null
+++ b/osf/management/commands/resync_preprint_dois_v1.py
@@ -0,0 +1,263 @@
+import logging
+import time
+
+from django.contrib.contenttypes.models import ContentType
+from django.core.management.base import BaseCommand
+from django.db.models import Q
+
+from framework.celery_tasks import app
+from osf.models import Preprint, Identifier
+from osf.models.base import VersionedGuidMixin
+from osf.management.commands.sync_doi_metadata import async_request_identifier_update
+
+logger = logging.getLogger(__name__)
+
+# 5-minute pause between rate-limit windows to avoid flooding the Crossref API
+# with too many deposit requests in a short period.
+RATE_LIMIT_SLEEP = 60 * 5
+
+
+def get_preprints_needing_v1_doi(provider_id=None):
+ content_type = ContentType.objects.get_for_model(Preprint)
+
+ already_versioned_ids = Identifier.objects.filter(
+ content_type=content_type,
+ category='doi',
+ deleted__isnull=True,
+ value__contains=VersionedGuidMixin.GUID_VERSION_DELIMITER,
+ ).values_list('object_id', flat=True)
+
+ public_query = Q(is_published=True, is_public=True, deleted__isnull=True)
+ withdrawn_query = Q(date_withdrawn__isnull=False, ever_public=True)
+
+ qs = Preprint.objects.filter(
+ versioned_guids__version=1,
+ ).filter(
+ public_query | withdrawn_query
+ ).exclude(
+ id__in=already_versioned_ids
+ ).exclude(
+ tags__name='qatest',
+ tags__system=True,
+ ).select_related('provider').distinct()
+
+ if provider_id:
+ qs = qs.filter(provider___id=provider_id)
+
+ return qs
+
+
+def resync_preprint_dois_v1(dry_run=True, batch_size=500, rate_limit=100, provider_id=None):
+ preprints_to_update = get_preprints_needing_v1_doi(provider_id=provider_id)
+
+ total = preprints_to_update.count()
+ logger.info(
+ f'{"[DRY RUN] " if dry_run else ""}'
+ f'{total} preprints need v1 DOI resync'
+ + (f' (provider={provider_id})' if provider_id else '')
+ )
+
+ if batch_size:
+ preprints_iterable = preprints_to_update[:batch_size]
+ else:
+ preprints_iterable = preprints_to_update.iterator()
+
+ queued = 0
+ skipped = 0
+ errored = 0
+ for record_number, preprint in enumerate(preprints_iterable, 1):
+ if not preprint.provider.doi_prefix:
+ logger.warning(
+ f'Skipping preprint {preprint._id}: '
+ f'provider {preprint.provider._id} has no DOI prefix'
+ )
+ skipped += 1
+ continue
+
+ if dry_run:
+ logger.info(f'[DRY RUN] Would resync DOI for preprint {preprint._id}')
+ queued += 1
+ continue
+
+ if rate_limit and not record_number % rate_limit:
+ logger.info(f'Rate limit reached at {record_number} preprints, sleeping {RATE_LIMIT_SLEEP}s')
+ time.sleep(RATE_LIMIT_SLEEP)
+
+ try:
+ async_request_identifier_update.apply_async(kwargs={'preprint_id': preprint._id})
+ logger.info(f'Queued DOI resync for preprint {preprint._id}')
+ queued += 1
+ except Exception:
+ logger.exception(f'Failed to queue DOI resync for preprint {preprint._id}')
+ errored += 1
+
+ logger.info(
+ f'{"[DRY RUN] " if dry_run else ""}'
+ f'Done: {queued} preprints queued, {skipped} skipped (no DOI prefix), {errored} errored'
+ )
+ if not dry_run and batch_size:
+ logger.info(
+ f'Estimated remaining after this batch: ~{max(0, total - queued - skipped - errored)}. '
+ f'Re-run this command until 0 preprints remain.'
+ )
+
+
+def get_preprints_needing_unversioned_doi(provider_id=None):
+ content_type = ContentType.objects.get_for_model(Preprint)
+
+ already_has_unversioned = Identifier.objects.filter(
+ content_type=content_type,
+ category='doi_unversioned',
+ deleted__isnull=True,
+ ).values_list('object_id', flat=True)
+
+ has_versioned_doi = Identifier.objects.filter(
+ content_type=content_type,
+ category='doi',
+ deleted__isnull=True,
+ value__contains=VersionedGuidMixin.GUID_VERSION_DELIMITER,
+ ).values_list('object_id', flat=True)
+
+ public_query = Q(is_published=True, is_public=True, deleted__isnull=True)
+ withdrawn_query = Q(date_withdrawn__isnull=False, ever_public=True)
+
+ qs = Preprint.objects.filter(
+ versioned_guids__version=1,
+ id__in=has_versioned_doi,
+ ).filter(
+ public_query | withdrawn_query
+ ).exclude(
+ id__in=already_has_unversioned
+ ).exclude(
+ tags__name='qatest',
+ tags__system=True,
+ ).select_related('provider').distinct()
+
+ if provider_id:
+ qs = qs.filter(provider___id=provider_id)
+
+ return qs
+
+
+def register_missing_unversioned_dois(dry_run=True, batch_size=500, rate_limit=100, provider_id=None):
+ preprints_to_update = get_preprints_needing_unversioned_doi(provider_id=provider_id)
+
+ total = preprints_to_update.count()
+ logger.info(
+ f'{"[DRY RUN] " if dry_run else ""}'
+ f'{total} preprints need unversioned DOI registration'
+ + (f' (provider={provider_id})' if provider_id else '')
+ )
+
+ if batch_size:
+ preprints_iterable = preprints_to_update[:batch_size]
+ else:
+ preprints_iterable = preprints_to_update.iterator()
+
+ queued = 0
+ skipped = 0
+ errored = 0
+ for record_number, preprint in enumerate(preprints_iterable, 1):
+ if not preprint.provider.doi_prefix:
+ logger.warning(
+ f'Skipping preprint {preprint._id}: '
+ f'provider {preprint.provider._id} has no DOI prefix'
+ )
+ skipped += 1
+ continue
+
+ if dry_run:
+ logger.info(f'[DRY RUN] Would register unversioned DOI for preprint {preprint._id}')
+ queued += 1
+ continue
+
+ if rate_limit and not record_number % rate_limit:
+ logger.info(f'Rate limit reached at {record_number} preprints, sleeping {RATE_LIMIT_SLEEP}s')
+ time.sleep(RATE_LIMIT_SLEEP)
+
+ try:
+ async_request_identifier_update.apply_async(kwargs={'preprint_id': preprint._id})
+ logger.info(f'Queued unversioned DOI registration for preprint {preprint._id}')
+ queued += 1
+ except Exception:
+ logger.exception(f'Failed to queue unversioned DOI registration for preprint {preprint._id}')
+ errored += 1
+
+ logger.info(
+ f'{"[DRY RUN] " if dry_run else ""}'
+ f'Unversioned DOI pass done: {queued} queued, {skipped} skipped, {errored} errored'
+ )
+ if not dry_run and batch_size:
+ logger.info(
+ f'Estimated unversioned remaining after this batch: ~{max(0, total - queued - skipped - errored)}. '
+ f'Re-run until 0 preprints remain.'
+ )
+
+
+@app.task(name='osf.management.commands.resync_preprint_dois_v1', max_retries=0)
+def resync_preprint_dois_v1_task(batch_size=500, rate_limit=100, dry_run=False, provider_id=None):
+ resync_preprint_dois_v1(
+ dry_run=dry_run,
+ batch_size=batch_size,
+ rate_limit=rate_limit,
+ provider_id=provider_id,
+ )
+ register_missing_unversioned_dois(
+ dry_run=dry_run,
+ batch_size=batch_size,
+ rate_limit=rate_limit,
+ provider_id=provider_id,
+ )
+
+
+class Command(BaseCommand):
+ help = (
+ 'Resync DOIs for version-1 preprints that are missing the versioned DOI suffix (_v1). '
+ 'Processes preprints in batches and queues Crossref deposit tasks. '
+ 'IMPORTANT: This command must be run repeatedly until it reports 0 preprints remaining, '
+ 'as each run only processes a single batch. '
+ 'Check remaining count with --dry_run before and after each run.'
+ )
+
+ def add_arguments(self, parser):
+ super().add_arguments(parser)
+ parser.add_argument(
+ '--dry_run',
+ action='store_true',
+ dest='dry_run',
+ help='Log what would be done without submitting to Crossref.',
+ )
+ parser.add_argument(
+ '--batch_size',
+ '-b',
+ type=int,
+ default=500,
+ help=(
+ 'Maximum number of preprints to process per run (default: 500). '
+ 'The command processes the first N eligible preprints and exits; '
+ 're-run the command to continue with the next batch.'
+ ),
+ )
+ parser.add_argument(
+ '--rate_limit',
+ '-r',
+ type=int,
+ default=100,
+ help='Sleep between Crossref submissions every N preprints.',
+ )
+ parser.add_argument(
+ '--provider',
+ '-p',
+ type=str,
+ default=None,
+ dest='provider_id',
+ help='Restrict to a single provider _id (e.g. socarxiv).',
+ )
+
+ def handle(self, *args, **options):
+ resync_preprint_dois_v1(
+ dry_run=options['dry_run'],
+ batch_size=options['batch_size'],
+ rate_limit=options['rate_limit'],
+ provider_id=options['provider_id'],
+ )
diff --git a/osf_tests/test_user.py b/osf_tests/test_user.py
index 2e5d0631cd4..d564429b992 100644
--- a/osf_tests/test_user.py
+++ b/osf_tests/test_user.py
@@ -1344,7 +1344,7 @@ def test_add_system_tag(self, user):
assert len(user.system_tags) == 1
- tag = Tag.all_tags.get(name=tag_name, system=True)
+ tag = Tag.all_tags.get(name=tag_name.lower(), system=True)
assert tag in user.all_tags.all()
def test_add_system_tag_instance(self, user):
diff --git a/tests/identifiers/test_crossref.py b/tests/identifiers/test_crossref.py
index e05284f303e..4d9c0b43806 100644
--- a/tests/identifiers/test_crossref.py
+++ b/tests/identifiers/test_crossref.py
@@ -95,6 +95,18 @@ def test_crossref_build_doi(self, crossref_client, preprint):
assert crossref_client.build_doi(preprint) == settings.DOI_FORMAT.format(prefix=doi_prefix, guid=preprint._id)
+ def test_crossref_build_unversioned_doi(self, crossref_client, preprint):
+ doi_prefix = preprint.provider.doi_prefix
+ base_guid = preprint.get_guid()._id
+
+ expected = settings.DOI_FORMAT.format(prefix=doi_prefix, guid=base_guid)
+ assert crossref_client.build_unversioned_doi(preprint) == expected
+ assert '_v' not in expected
+
+ def test_crossref_build_unversioned_doi_matches_base_guid_not_versioned(self, crossref_client, preprint, preprint_version):
+ assert crossref_client.build_doi(preprint_version) != crossref_client.build_unversioned_doi(preprint_version)
+ assert crossref_client.build_unversioned_doi(preprint) == crossref_client.build_unversioned_doi(preprint_version)
+
def test_crossref_build_doi_versioned(self, crossref_client, preprint_version):
doi_prefix = preprint_version.provider.doi_prefix
@@ -338,6 +350,57 @@ def test_metadata_for_non_included_relation(self, crossref_client, preprint):
root_without_relation = lxml.etree.fromstring(xml_without_relation)
assert root_without_relation.find('.//{%s}intra_work_relation' % crossref.CROSSREF_RELATIONS) is None
+ def test_metadata_includes_unversioned_doi_entry(self, crossref_client, preprint):
+ crossref_xml = crossref_client.build_metadata(preprint, include_unversioned_doi=True)
+ root = lxml.etree.fromstring(crossref_xml)
+
+ posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
+ assert len(posted_contents) == 2
+
+ versioned_dois = posted_contents[0].findall('.//{%s}doi' % crossref.CROSSREF_NAMESPACE)
+ assert len(versioned_dois) == 1
+ assert versioned_dois[0].text == crossref_client.build_doi(preprint)
+
+ unversioned_dois = posted_contents[1].findall('.//{%s}doi' % crossref.CROSSREF_NAMESPACE)
+ assert len(unversioned_dois) == 1
+ assert unversioned_dois[0].text == crossref_client.build_unversioned_doi(preprint)
+ assert '_v' not in unversioned_dois[0].text
+
+ unversioned_resource = posted_contents[1].find('.//{%s}resource' % crossref.CROSSREF_NAMESPACE)
+ base_guid = preprint.get_guid()._id
+ assert unversioned_resource.text == settings.DOMAIN + base_guid
+
+ def test_metadata_unversioned_doi_uses_latest_version_metadata(self, crossref_client, preprint, preprint_version):
+ crossref_xml = crossref_client.build_metadata(preprint, include_unversioned_doi=True)
+ root = lxml.etree.fromstring(crossref_xml)
+
+ posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
+ assert len(posted_contents) == 2
+
+ unversioned_resource = posted_contents[1].find('.//{%s}resource' % crossref.CROSSREF_NAMESPACE)
+ base_guid = preprint.get_guid()._id
+ assert unversioned_resource.text == settings.DOMAIN + base_guid
+
+ def test_metadata_unversioned_doi_has_no_version_relations(self, crossref_client, preprint, preprint_version):
+ crossref_xml = crossref_client.build_metadata(preprint_version, include_unversioned_doi=True)
+ root = lxml.etree.fromstring(crossref_xml)
+
+ posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
+ unversioned_content = posted_contents[1]
+
+ relations = unversioned_content.findall('.//{%s}intra_work_relation' % crossref.CROSSREF_RELATIONS)
+ is_version_of_relations = [
+ r for r in relations if r.get('relationship-type') == 'isVersionOf'
+ ]
+ assert len(is_version_of_relations) == 0
+
+ def test_metadata_bulk_does_not_include_unversioned_doi(self, crossref_client, preprint, preprint_version):
+ crossref_xml = crossref_client.build_metadata([preprint, preprint_version])
+ root = lxml.etree.fromstring(crossref_xml)
+
+ posted_contents = root.findall('.//{%s}posted_content' % crossref.CROSSREF_NAMESPACE)
+ assert len(posted_contents) == 2 # one per preprint, no extras
+
def test_metadata_for_affiliated_institutions(self, crossref_client, preprint):
institution = InstitutionFactory()
institution.ror_uri = 'http://ror.org/WHATisITgoodFOR/'
diff --git a/tests/identifiers/test_resync_preprint_dois_v1.py b/tests/identifiers/test_resync_preprint_dois_v1.py
new file mode 100644
index 00000000000..05c5996fb73
--- /dev/null
+++ b/tests/identifiers/test_resync_preprint_dois_v1.py
@@ -0,0 +1,187 @@
+import pytest
+from unittest import mock
+from django.utils import timezone
+
+from osf.models import Preprint
+from osf_tests.factories import PreprintFactory, PreprintProviderFactory
+from osf.management.commands.resync_preprint_dois_v1 import (
+ get_preprints_needing_v1_doi,
+ resync_preprint_dois_v1,
+)
+from website import settings
+
+pytestmark = pytest.mark.django_db
+
+
+@pytest.fixture()
+def provider():
+ p = PreprintProviderFactory()
+ p.doi_prefix = '10.31219'
+ p.save()
+ return p
+
+
+@pytest.fixture()
+def preprint(provider):
+ pp = PreprintFactory(provider=provider, is_published=True)
+ old_doi = settings.DOI_FORMAT.format(prefix=provider.doi_prefix, guid=pp.get_guid()._id)
+ pp.set_identifier_values(doi=old_doi, save=True)
+ return pp
+
+
+@pytest.fixture()
+def preprint_with_v1_doi(provider):
+ pp = PreprintFactory(provider=provider, is_published=True)
+ v1_doi = settings.DOI_FORMAT.format(prefix=provider.doi_prefix, guid=pp._id)
+ pp.set_identifier_values(doi=v1_doi, save=True)
+ return pp
+
+
+class TestGetPreprrintsNeedingV1Doi:
+
+ def test_includes_public_preprint_without_versioned_doi(self, preprint):
+ qs = get_preprints_needing_v1_doi()
+ assert preprint in qs
+
+ def test_excludes_preprint_with_versioned_doi(self, preprint_with_v1_doi):
+ qs = get_preprints_needing_v1_doi()
+ assert preprint_with_v1_doi not in qs
+
+ def test_excludes_preprint_with_no_doi_if_private(self, provider):
+ private_preprint = PreprintFactory(provider=provider, is_published=False)
+ private_preprint.is_public = False
+ private_preprint.save()
+ qs = get_preprints_needing_v1_doi()
+ assert private_preprint not in qs
+
+ def test_includes_withdrawn_preprint_with_ever_public(self, provider):
+ pp = PreprintFactory(provider=provider, is_published=True)
+ old_doi = settings.DOI_FORMAT.format(prefix=provider.doi_prefix, guid=pp.get_guid()._id)
+ pp.set_identifier_values(doi=old_doi, save=True)
+ pp.date_withdrawn = timezone.now()
+ pp.ever_public = True
+ pp.save()
+ qs = get_preprints_needing_v1_doi()
+ assert pp in qs
+
+ def test_excludes_withdrawn_preprint_never_public(self, provider):
+ pp = PreprintFactory(provider=provider, is_published=False)
+ Preprint.objects.filter(pk=pp.pk).update(date_withdrawn=timezone.now())
+ qs = get_preprints_needing_v1_doi()
+ assert pp not in qs
+
+ def test_excludes_version_2_preprint(self, preprint):
+ from tests.utils import capture_notifications
+ with capture_notifications():
+ v2 = PreprintFactory.create_version(preprint, is_published=True, set_doi=False)
+ old_doi = settings.DOI_FORMAT.format(prefix=preprint.provider.doi_prefix, guid=v2.get_guid()._id)
+ v2.set_identifier_values(doi=old_doi, save=True)
+ qs = get_preprints_needing_v1_doi()
+ assert v2 not in qs
+
+ def test_excludes_qatest_tagged_preprint(self, preprint):
+ preprint.add_system_tag('qatest')
+ qs = get_preprints_needing_v1_doi()
+ assert preprint not in qs
+
+ def test_excludes_deleted_preprint(self, preprint):
+ preprint.deleted = timezone.now()
+ preprint.save()
+ qs = get_preprints_needing_v1_doi()
+ assert preprint not in qs
+
+ def test_provider_filter_limits_results(self, preprint, provider):
+ other_provider = PreprintProviderFactory()
+ other_provider.doi_prefix = '10.12345'
+ other_provider.save()
+ other_preprint = PreprintFactory(provider=other_provider, is_published=True)
+ old_doi = settings.DOI_FORMAT.format(prefix=other_provider.doi_prefix, guid=other_preprint.get_guid()._id)
+ other_preprint.set_identifier_values(doi=old_doi, save=True)
+
+ qs = get_preprints_needing_v1_doi(provider_id=provider._id)
+ assert preprint in qs
+ assert other_preprint not in qs
+
+ def test_preprint_with_no_doi_identifier_is_included(self, provider):
+ pp = PreprintFactory(provider=provider, is_published=True, set_doi=False)
+ qs = get_preprints_needing_v1_doi()
+ assert pp in qs
+
+
+class TestResyncPreprintDoisV1:
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_dry_run_does_not_queue_tasks(self, mock_task, preprint):
+ resync_preprint_dois_v1(dry_run=True)
+ mock_task.apply_async.assert_not_called()
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_live_run_queues_task_for_each_preprint(self, mock_task, preprint):
+ resync_preprint_dois_v1(dry_run=False, rate_limit=0)
+ mock_task.apply_async.assert_called_once_with(kwargs={'preprint_id': preprint._id})
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_batch_size_limits_processed_count(self, mock_task, provider):
+ preprints = []
+ for _ in range(5):
+ pp = PreprintFactory(provider=provider, is_published=True)
+ old_doi = settings.DOI_FORMAT.format(prefix=provider.doi_prefix, guid=pp.get_guid()._id)
+ pp.set_identifier_values(doi=old_doi, save=True)
+ preprints.append(pp)
+
+ resync_preprint_dois_v1(dry_run=False, batch_size=2, rate_limit=0)
+ assert mock_task.apply_async.call_count == 2
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_skips_provider_without_doi_prefix(self, mock_task, provider):
+ no_prefix_provider = PreprintProviderFactory()
+ no_prefix_provider.doi_prefix = ''
+ no_prefix_provider.save()
+ pp = PreprintFactory(provider=no_prefix_provider, is_published=True)
+ old_doi = '10.000/old-doi'
+ pp.set_identifier_values(doi=old_doi, save=True)
+
+ resync_preprint_dois_v1(dry_run=False, rate_limit=0)
+ queued_ids = [
+ call.kwargs['kwargs']['preprint_id']
+ for call in mock_task.apply_async.call_args_list
+ ]
+ assert pp._id not in queued_ids
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_provider_filter_is_applied(self, mock_task, preprint, provider):
+ other_provider = PreprintProviderFactory()
+ other_provider.doi_prefix = '10.99999'
+ other_provider.save()
+ other_pp = PreprintFactory(provider=other_provider, is_published=True)
+ old_doi = settings.DOI_FORMAT.format(prefix=other_provider.doi_prefix, guid=other_pp.get_guid()._id)
+ other_pp.set_identifier_values(doi=old_doi, save=True)
+
+ resync_preprint_dois_v1(dry_run=False, rate_limit=0, provider_id=provider._id)
+
+ queued_ids = [
+ call.kwargs['kwargs']['preprint_id']
+ for call in mock_task.apply_async.call_args_list
+ ]
+ assert preprint._id in queued_ids
+ assert other_pp._id not in queued_ids
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_already_versioned_doi_is_not_queued(self, mock_task, preprint_with_v1_doi):
+ resync_preprint_dois_v1(dry_run=False, rate_limit=0)
+ queued_ids = [
+ call.kwargs['kwargs']['preprint_id']
+ for call in mock_task.apply_async.call_args_list
+ ]
+ assert preprint_with_v1_doi._id not in queued_ids
+
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.time.sleep')
+ @mock.patch('osf.management.commands.resync_preprint_dois_v1.async_request_identifier_update')
+ def test_rate_limit_triggers_sleep(self, mock_task, mock_sleep, provider):
+ for _ in range(3):
+ pp = PreprintFactory(provider=provider, is_published=True)
+ old_doi = settings.DOI_FORMAT.format(prefix=provider.doi_prefix, guid=pp.get_guid()._id)
+ pp.set_identifier_values(doi=old_doi, save=True)
+
+ resync_preprint_dois_v1(dry_run=False, rate_limit=2)
+ mock_sleep.assert_called_once()
diff --git a/website/identifiers/clients/crossref.py b/website/identifiers/clients/crossref.py
index 8f496ce363b..c1d0aa41ece 100644
--- a/website/identifiers/clients/crossref.py
+++ b/website/identifiers/clients/crossref.py
@@ -41,13 +41,18 @@ def build_doi(self, preprint):
prefix = preprint.provider.doi_prefix
return settings.DOI_FORMAT.format(prefix=prefix, guid=preprint._id)
- def build_metadata(self, preprint, include_relation=True):
+ def build_unversioned_doi(self, preprint):
+ prefix = preprint.provider.doi_prefix
+ return settings.DOI_FORMAT.format(prefix=prefix, guid=preprint.get_guid()._id)
+
+ def build_metadata(self, preprint, include_relation=True, include_unversioned_doi=False):
"""Return the crossref metadata XML document for a given preprint as a string for DOI minting purposes
:param preprint: the preprint, or list of preprints to build metadata for
"""
if isinstance(preprint, (list, QuerySet)):
preprints = preprint
+ include_unversioned_doi = False # not supported for bulk batches
else:
preprints = [preprint]
@@ -74,6 +79,9 @@ def build_metadata(self, preprint, include_relation=True):
for preprint in preprints:
body.append(self.build_posted_content(preprint, element, include_relation))
+ if include_unversioned_doi:
+ body.append(self.build_unversioned_posted_content(preprints[0], element))
+
root = element.doi_batch(
head,
body,
@@ -82,10 +90,12 @@ def build_metadata(self, preprint, include_relation=True):
root.attrib['{%s}schemaLocation' % XSI] = CROSSREF_SCHEMA_LOCATION
return lxml.etree.tostring(root)
- def build_posted_content(self, preprint, element, include_relation):
+ def build_posted_content(self, preprint, element, include_relation, doi_override=None, resource_override=None):
"""Build the element for a single preprint
preprint - preprint to build posted_content for
element - namespace element to use when building parts of the XML structure
+ doi_override - if provided, use this DOI value instead of the preprint's own DOI
+ resource_override - if provided, use this URL as the instead of the default
"""
from osf.models import SpamStatus
@@ -138,7 +148,7 @@ def build_posted_content(self, preprint, element, include_relation):
preprint_versions = preprint.get_preprint_versions(
versioned_guids__version__lt=preprint.version,
include_rejected=False,
- )
+ ) if include_relation else []
if preprint_versions:
for previous_version in preprint_versions:
@@ -157,15 +167,27 @@ def build_posted_content(self, preprint, element, include_relation):
posted_content.append(relations_program)
minted_doi = preprint.get_identifier_value('doi')
- doi = minted_doi or self.build_doi(preprint)
+ doi = doi_override or minted_doi or self.build_doi(preprint)
+ resource_url = resource_override if resource_override is not None else settings.DOMAIN + preprint._id
doi_data = [
element.doi(doi),
- element.resource(settings.DOMAIN + preprint._id)
+ element.resource(resource_url)
]
posted_content.append(element.doi_data(*doi_data))
return posted_content
+ def build_unversioned_posted_content(self, preprint, element):
+ latest = preprint.get_guid().referent
+ base_guid = latest.get_guid()._id
+ return self.build_posted_content(
+ latest,
+ element,
+ include_relation=False,
+ doi_override=self.build_unversioned_doi(latest),
+ resource_override=settings.DOMAIN + base_guid,
+ )
+
def _process_crossref_name(self, contributor):
# Adapted from logic used in `api/citations/utils.py`
# If the user has a family and given name, use those
@@ -249,7 +271,7 @@ def _build_url(self, **query):
def create_identifier(self, preprint, category, include_relation=True):
if category == 'doi':
- metadata = self.build_metadata(preprint, include_relation)
+ metadata = self.build_metadata(preprint, include_relation, include_unversioned_doi=True)
doi = self.build_doi(preprint)
username, password = self.get_credentials()
logger.info(f'Sending metadata for DOI {doi}:\n{metadata}')
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index fbe9b939ae1..d2d62cfcb60 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -711,6 +711,11 @@ class CeleryConfig:
'schedule': crontab(minute=0, hour=5), # Daily 12 a.m
'kwargs': {'dry_run': False},
},
+ 'resync_preprint_dois_v1': {
+ 'task': 'osf.management.commands.resync_preprint_dois_v1',
+ 'schedule': crontab(minute=0, hour=5), # Daily 12 a.m EDT
+ 'kwargs': {'dry_run': False},
+ },
}