diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index 23a22891..94d9d470 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -1,1193 +1,1194 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from abc import ABCMeta, abstractmethod
 import datetime
 import hashlib
 import json
 from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
 import uuid
 
 import attr
 from django.core.files.uploadedfile import UploadedFile
 from django.http import FileResponse, HttpResponse
 from django.shortcuts import render
 from django.urls import reverse
 from django.utils import timezone
 from rest_framework import status
 from rest_framework.authentication import BaseAuthentication, BasicAuthentication
 from rest_framework.permissions import BasePermission, IsAuthenticated
 from rest_framework.request import Request
 from rest_framework.views import APIView
 
 from swh.deposit.api.checks import check_metadata
 from swh.deposit.api.converters import convert_status_detail
 from swh.deposit.models import Deposit
 from swh.deposit.utils import compute_metadata_context
 from swh.model import hashutil
 from swh.model.identifiers import SWHID, ValidationError
 from swh.model.model import (
     MetadataAuthority,
     MetadataAuthorityType,
     MetadataFetcher,
     RawExtrinsicMetadata,
 )
 from swh.scheduler.utils import create_oneshot_task_dict
 
 from ..config import (
     ARCHIVE_KEY,
     ARCHIVE_TYPE,
     CONT_FILE_IRI,
     DEPOSIT_STATUS_DEPOSITED,
     DEPOSIT_STATUS_LOAD_SUCCESS,
     DEPOSIT_STATUS_PARTIAL,
     EDIT_IRI,
     EM_IRI,
     METADATA_KEY,
     METADATA_TYPE,
     RAW_METADATA_KEY,
     SE_IRI,
     STATE_IRI,
     APIConfig,
 )
 from ..errors import (
     BAD_REQUEST,
     CHECKSUM_MISMATCH,
     ERROR_CONTENT,
     FORBIDDEN,
     MAX_UPLOAD_SIZE_EXCEEDED,
     MEDIATION_NOT_ALLOWED,
     METHOD_NOT_ALLOWED,
     NOT_FOUND,
     PARSING_ERROR,
     DepositError,
     ParserError,
 )
 from ..models import DepositClient, DepositCollection, DepositRequest
-from ..parsers import parse_swh_reference, parse_xml
+from ..parsers import parse_xml
+from ..utils import parse_swh_reference
 
 ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
 ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
 
 
 @attr.s
 class ParsedRequestHeaders:
     content_type = attr.ib(type=str)
     content_length = attr.ib(type=Optional[int])
     in_progress = attr.ib(type=bool)
     content_disposition = attr.ib(type=Optional[str])
     content_md5sum = attr.ib(type=Optional[bytes])
     packaging = attr.ib(type=Optional[str])
     slug = attr.ib(type=Optional[str])
     on_behalf_of = attr.ib(type=Optional[str])
     metadata_relevant = attr.ib(type=Optional[str])
     swhid = attr.ib(type=Optional[str])
 
 
 @attr.s
 class Receipt:
     """Data computed while handling the request body that will be served in the
     Deposit Receipt."""
 
     deposit_id = attr.ib(type=int)
     deposit_date = attr.ib(type=datetime.datetime)
     status = attr.ib(type=str)
     archive = attr.ib(type=Optional[str])
 
 
 def _compute_md5(filehandler: UploadedFile) -> bytes:
     h = hashlib.md5()
     for chunk in filehandler:
         h.update(chunk)  # type: ignore
     return h.digest()
 
 
 def get_deposit_by_id(
     deposit_id: int, collection_name: Optional[str] = None
 ) -> Deposit:
     """Gets an existing Deposit object if it exists, or raises `DepositError`.
     If `collection` is not None, also checks the deposit belongs to the collection."""
     try:
         deposit = Deposit.objects.get(pk=deposit_id)
     except Deposit.DoesNotExist:
         raise DepositError(NOT_FOUND, f"Deposit {deposit_id} does not exist")
 
     if collection_name and deposit.collection.name != collection_name:
         get_collection_by_name(collection_name)  # raises if does not exist
 
         raise DepositError(
             NOT_FOUND,
             f"Deposit {deposit_id} does not belong to collection {collection_name}",
         )
 
     return deposit
 
 
 def get_collection_by_name(collection_name: str):
     """Gets an existing Deposit object if it exists, or raises `DepositError`."""
     try:
         collection = DepositCollection.objects.get(name=collection_name)
     except DepositCollection.DoesNotExist:
         raise DepositError(NOT_FOUND, f"Unknown collection name {collection_name}")
 
     assert collection is not None
 
     return collection
 
 
 def guess_deposit_origin_url(deposit: Deposit):
     """Guesses an origin url for the given deposit."""
     external_id = deposit.external_id
     if not external_id:
         # The client provided neither an origin_url nor a slug. That's inconvenient,
         # but SWORD requires we support it. So let's generate a random slug.
         external_id = str(uuid.uuid4())
     return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id)
 
 
 def check_client_origin(client: DepositClient, origin_url: str):
     provider_url = client.provider_url.rstrip("/") + "/"
     if not origin_url.startswith(provider_url):
         raise DepositError(
             FORBIDDEN,
             f"Cannot create origin {origin_url}, it must start with " f"{provider_url}",
         )
 
 
 class AuthenticatedAPIView(APIView):
     """Mixin intended as a based API view to enforce the basic
        authentication check
 
     """
 
     authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,)
     permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,)
 
 
 class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta):
     """Base deposit request class sharing multiple common behaviors.
 
     """
 
     _client: Optional[DepositClient] = None
 
     def _read_headers(self, request: Request) -> ParsedRequestHeaders:
         """Read and unify the necessary headers from the request (those are
            not stored in the same location or not properly formatted).
 
         Args:
             request: Input request
 
         Returns:
             Dictionary with the following keys (some associated values may be
               None):
                 - content-type
                 - content-length
                 - in-progress
                 - content-disposition
                 - packaging
                 - slug
                 - on-behalf-of
 
         """
         meta = request._request.META
 
         content_length = meta.get("CONTENT_LENGTH")
         if content_length and isinstance(content_length, str):
             content_length = int(content_length)
 
         # final deposit if not provided
         in_progress = meta.get("HTTP_IN_PROGRESS", False)
         if isinstance(in_progress, str):
             in_progress = in_progress.lower() == "true"
 
         content_md5sum = meta.get("HTTP_CONTENT_MD5")
         if content_md5sum:
             content_md5sum = bytes.fromhex(content_md5sum)
 
         return ParsedRequestHeaders(
             content_type=request.content_type,
             content_length=content_length,
             in_progress=in_progress,
             content_disposition=meta.get("HTTP_CONTENT_DISPOSITION"),
             content_md5sum=content_md5sum,
             packaging=meta.get("HTTP_PACKAGING"),
             slug=meta.get("HTTP_SLUG"),
             on_behalf_of=meta.get("HTTP_ON_BEHALF_OF"),
             metadata_relevant=meta.get("HTTP_METADATA_RELEVANT"),
             swhid=meta.get("HTTP_X_CHECK_SWHID"),
         )
 
     def _deposit_put(self, deposit: Deposit, in_progress: bool = False) -> None:
         """Save/Update a deposit in db.
 
         Args:
             deposit: deposit being updated/created
             in_progress: deposit status
         """
         if in_progress is False:
             self._complete_deposit(deposit)
         else:
             deposit.status = DEPOSIT_STATUS_PARTIAL
             deposit.save()
 
     def _complete_deposit(self, deposit: Deposit) -> None:
         """Marks the deposit as 'deposited', then schedule a check task if configured
         to do so."""
         deposit.complete_date = timezone.now()
         deposit.status = DEPOSIT_STATUS_DEPOSITED
         deposit.save()
 
         if not deposit.origin_url:
             deposit.origin_url = guess_deposit_origin_url(deposit)
 
         if self.config["checks"]:
             scheduler = self.scheduler
             if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id:
                 task = create_oneshot_task_dict(
                     "check-deposit",
                     collection=deposit.collection.name,
                     deposit_id=deposit.id,
                     retries_left=3,
                 )
                 check_task_id = scheduler.create_tasks([task])[0]["id"]
                 deposit.check_task_id = check_task_id
 
         deposit.save()
 
     def _deposit_request_put(
         self,
         deposit: Deposit,
         deposit_request_data: Dict[str, Any],
         replace_metadata: bool = False,
         replace_archives: bool = False,
     ) -> DepositRequest:
         """Save a deposit request with metadata attached to a deposit.
 
         Args:
             deposit: The deposit concerned by the request
             deposit_request_data: The dictionary with at most 2 deposit
               request types (archive, metadata) to associate to the deposit
             replace_metadata: Flag defining if we add or update
               existing metadata to the deposit
             replace_archives: Flag defining if we add or update
               archives to existing deposit
 
         Returns:
             the DepositRequest object stored in the backend
 
         """
         if replace_metadata:
             DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete()
 
         if replace_archives:
             DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
 
         deposit_request = None
 
         archive_file = deposit_request_data.get(ARCHIVE_KEY)
         if archive_file:
             deposit_request = DepositRequest(
                 type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file
             )
             deposit_request.save()
 
         metadata = deposit_request_data.get(METADATA_KEY)
         if metadata:
             raw_metadata = deposit_request_data[RAW_METADATA_KEY]
             deposit_request = DepositRequest(
                 type=METADATA_TYPE,
                 deposit=deposit,
                 metadata=metadata,
                 raw_metadata=raw_metadata.decode("utf-8"),
             )
             deposit_request.save()
 
         assert deposit_request is not None
         return deposit_request
 
     def _delete_archives(self, collection_name: str, deposit: Deposit) -> Dict:
         """Delete archive references from the deposit id.
 
         """
         DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
 
         return {}
 
     def _delete_deposit(self, collection_name: str, deposit: Deposit) -> Dict:
         """Delete deposit reference.
 
         Args:
             collection_name: Client's collection
             deposit: The deposit to delete
 
         Returns
             Empty dict when ok.
             Dict with error key to describe the failure.
 
         """
         if deposit.collection.name != collection_name:
             summary = "Cannot delete a deposit from another collection"
             description = "Deposit %s does not belong to the collection %s" % (
                 deposit.id,
                 collection_name,
             )
             raise DepositError(
                 BAD_REQUEST, summary=summary, verbose_description=description
             )
 
         DepositRequest.objects.filter(deposit=deposit).delete()
         deposit.delete()
 
         return {}
 
     def _check_file_length(
         self, filehandler: UploadedFile, content_length: Optional[int] = None,
     ) -> None:
         """Check the filehandler passed as argument has exactly the
         expected content_length
 
         Args:
             filehandler: The file to check
             content_length: the expected length if provided.
 
         Raises:
             DepositError if the actual length does not match
         """
         max_upload_size = self.config["max_upload_size"]
         if content_length:
             length = filehandler.size
             if length != content_length:
                 raise DepositError(status.HTTP_412_PRECONDITION_FAILED, "Wrong length")
 
         if filehandler.size > max_upload_size:
             raise DepositError(
                 MAX_UPLOAD_SIZE_EXCEEDED,
                 f"Upload size limit exceeded (max {max_upload_size} bytes)."
                 "Please consider sending the archive in multiple steps.",
             )
 
     def _check_file_md5sum(
         self, filehandler: UploadedFile, md5sum: Optional[bytes],
     ) -> None:
         """Check the filehandler passed as argument has the expected md5sum
 
         Args:
             filehandler: The file to check
             md5sum: md5 hash expected from the file's content
 
         Raises:
             DepositError if the md5sum does not match
 
         """
         if md5sum:
             _md5sum = _compute_md5(filehandler)
             if _md5sum != md5sum:
                 raise DepositError(
                     CHECKSUM_MISMATCH,
                     "Wrong md5 hash",
                     f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual "
                     f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.",
                 )
 
     def _binary_upload(
         self,
         request: Request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Deposit,
         replace_metadata: bool = False,
         replace_archives: bool = False,
     ) -> Receipt:
         """Binary upload routine.
 
         Other than such a request, a 415 response is returned.
 
         Args:
             request: the request holding information to parse
                 and inject in db
             headers: parsed request headers
             collection_name: the associated client
             deposit: deposit to be updated
             replace_metadata: 'Update or add' request to existing
               deposit. If False (default), this adds new metadata request to
               existing ones. Otherwise, this will replace existing metadata.
             replace_archives: 'Update or add' request to existing
               deposit. If False (default), this adds new archive request to
               existing ones. Otherwise, this will replace existing archives.
               ones.
 
         Raises:
             - 400 (bad request) if the request is not providing an external
               identifier
             - 413 (request entity too large) if the length of the
               archive exceeds the max size configured
             - 412 (precondition failed) if the length or md5 hash provided
               mismatch the reality of the archive
             - 415 (unsupported media type) if a wrong media type is provided
 
         """
         content_length = headers.content_length
         if not content_length:
             raise DepositError(
                 BAD_REQUEST,
                 "CONTENT_LENGTH header is mandatory",
                 "For archive deposit, the CONTENT_LENGTH header must be sent.",
             )
 
         content_disposition = headers.content_disposition
         if not content_disposition:
             raise DepositError(
                 BAD_REQUEST,
                 "CONTENT_DISPOSITION header is mandatory",
                 "For archive deposit, the CONTENT_DISPOSITION header must be sent.",
             )
 
         packaging = headers.packaging
         if packaging and packaging not in ACCEPT_PACKAGINGS:
             raise DepositError(
                 BAD_REQUEST,
                 f"Only packaging {ACCEPT_PACKAGINGS} is supported",
                 f"The packaging provided {packaging} is not supported",
             )
 
         filehandler = request.FILES["file"]
         assert isinstance(filehandler, UploadedFile), filehandler
 
         self._check_file_length(filehandler, content_length)
         self._check_file_md5sum(filehandler, headers.content_md5sum)
 
         # actual storage of data
         archive_metadata = filehandler
         self._deposit_put(
             deposit=deposit, in_progress=headers.in_progress,
         )
         self._deposit_request_put(
             deposit,
             {ARCHIVE_KEY: archive_metadata},
             replace_metadata=replace_metadata,
             replace_archives=replace_archives,
         )
 
         return Receipt(
             deposit_id=deposit.id,
             deposit_date=deposit.reception_date,
             status=deposit.status,
             archive=filehandler.name,
         )
 
     def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]:
         """Given a metadata stream, reads the metadata and returns both the
            parsed and the raw metadata.
 
         """
         raw_metadata = metadata_stream.read()
         metadata = parse_xml(raw_metadata)
         return raw_metadata, metadata
 
     def _multipart_upload(
         self,
         request: Request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Deposit,
         replace_metadata: bool = False,
         replace_archives: bool = False,
     ) -> Receipt:
         """Multipart upload supported with exactly:
         - 1 archive (zip)
         - 1 atom entry
 
         Other than such a request, a 415 response is returned.
 
         Args:
             request: the request holding information to parse
                 and inject in db
             headers: parsed request headers
             collection_name: the associated client
             deposit: deposit to be updated
             replace_metadata: 'Update or add' request to existing
               deposit. If False (default), this adds new metadata request to
               existing ones. Otherwise, this will replace existing metadata.
             replace_archives: 'Update or add' request to existing
               deposit. If False (default), this adds new archive request to
               existing ones. Otherwise, this will replace existing archives.
               ones.
 
         Raises:
             - 400 (bad request) if the request is not providing an external
               identifier
             - 412 (precondition failed) if the potentially md5 hash provided
               mismatch the reality of the archive
             - 413 (request entity too large) if the length of the
               archive exceeds the max size configured
             - 415 (unsupported media type) if a wrong media type is provided
 
         """
         content_types_present = set()
 
         data: Dict[str, Optional[Any]] = {
             "application/zip": None,  # expected either zip
             "application/x-tar": None,  # or x-tar
             "application/atom+xml": None,
         }
         for key, value in request.FILES.items():
             fh = value
             content_type = fh.content_type
             if content_type in content_types_present:
                 raise DepositError(
                     ERROR_CONTENT,
                     "Only 1 application/zip (or application/x-tar) archive "
                     "and 1 atom+xml entry is supported (as per sword2.0 "
                     "specification)",
                     "You provided more than 1 application/(zip|x-tar) "
                     "or more than 1 application/atom+xml content-disposition "
                     "header in the multipart deposit",
                 )
 
             content_types_present.add(content_type)
             assert content_type is not None
             data[content_type] = fh
 
         if len(content_types_present) != 2:
             raise DepositError(
                 ERROR_CONTENT,
                 "You must provide both 1 application/zip (or "
                 "application/x-tar) and 1 atom+xml entry for multipart "
                 "deposit",
                 "You need to provide only 1 application/(zip|x-tar) "
                 "and 1 application/atom+xml content-disposition header "
                 "in the multipart deposit",
             )
 
         filehandler = data["application/zip"]
         if not filehandler:
             filehandler = data["application/x-tar"]
 
         assert isinstance(filehandler, UploadedFile), filehandler
 
         self._check_file_length(filehandler)
         self._check_file_md5sum(filehandler, headers.content_md5sum)
 
         try:
             raw_metadata, metadata = self._read_metadata(data["application/atom+xml"])
         except ParserError:
             raise DepositError(
                 PARSING_ERROR,
                 "Malformed xml metadata",
                 "The xml received is malformed. "
                 "Please ensure your metadata file is correctly formatted.",
             )
 
         # actual storage of data
         self._deposit_put(
             deposit=deposit, in_progress=headers.in_progress,
         )
         deposit_request_data = {
             ARCHIVE_KEY: filehandler,
             METADATA_KEY: metadata,
             RAW_METADATA_KEY: raw_metadata,
         }
         self._deposit_request_put(
             deposit, deposit_request_data, replace_metadata, replace_archives
         )
 
         assert filehandler is not None
         return Receipt(
             deposit_id=deposit.id,
             deposit_date=deposit.reception_date,
             archive=filehandler.name,
             status=deposit.status,
         )
 
     def _store_metadata_deposit(
         self,
         deposit: Deposit,
         swhid_reference: Union[str, SWHID],
         metadata: Dict,
         raw_metadata: bytes,
         deposit_origin: Optional[str] = None,
     ) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]:
         """When all user inputs pass the checks, this associates the raw_metadata to the
            swhid_reference in the raw extrinsic metadata storage. In case of any issues,
            a bad request response is returned to the user with the details.
 
             Checks:
             - metadata are technically parsable
             - metadata pass the functional checks
             - SWHID (if any) is technically valid
 
         Args:
             deposit: Deposit reference
             swhid_reference: The swhid or the origin to attach metadata information to
             metadata: Full dict of metadata to check for validity (parsed out of
               raw_metadata)
             raw_metadata: The actual raw metadata to send in the storage metadata
             deposit_origin: Optional deposit origin url to use if any (e.g. deposit
               update scenario provides one)
 
         Raises:
             DepositError in case of incorrect inputs from the deposit client
             (e.g. functionally invalid metadata, ...)
 
         Returns:
             Tuple of core swhid, swhid context, deposit and deposit request
 
         """
         metadata_ok, error_details = check_metadata(metadata)
         if not metadata_ok:
             assert error_details, "Details should be set when a failure occurs"
             raise DepositError(
                 BAD_REQUEST,
                 "Functional metadata checks failure",
                 convert_status_detail(error_details),
             )
 
         metadata_authority = MetadataAuthority(
             type=MetadataAuthorityType.DEPOSIT_CLIENT,
             url=deposit.client.provider_url,
             metadata={"name": deposit.client.last_name},
         )
 
         metadata_fetcher = MetadataFetcher(
             name=self.tool["name"],
             version=self.tool["version"],
             metadata=self.tool["configuration"],
         )
 
         # replace metadata within the deposit backend
         deposit_request_data = {
             METADATA_KEY: metadata,
             RAW_METADATA_KEY: raw_metadata,
         }
 
         # actually add the metadata to the completed deposit
         deposit_request = self._deposit_request_put(deposit, deposit_request_data)
 
         object_type, metadata_context = compute_metadata_context(swhid_reference)
         if deposit_origin:  # metadata deposit update on completed deposit
             metadata_context["origin"] = deposit_origin
 
         swhid_core: Union[str, SWHID]
         if isinstance(swhid_reference, str):
             swhid_core = swhid_reference
         else:
             swhid_core = attr.evolve(swhid_reference, metadata={})
 
         # store that metadata to the metadata storage
         metadata_object = RawExtrinsicMetadata(
             type=object_type,
             target=swhid_core,  # core swhid or origin
             discovery_date=deposit_request.date,
             authority=metadata_authority,
             fetcher=metadata_fetcher,
             format="sword-v2-atom-codemeta",
             metadata=raw_metadata,
             **metadata_context,
         )
 
         # write to metadata storage
         self.storage_metadata.metadata_authority_add([metadata_authority])
         self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
         self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
 
         return (swhid_core, swhid_reference, deposit, deposit_request)
 
     def _atom_entry(
         self,
         request: Request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Deposit,
         replace_metadata: bool = False,
         replace_archives: bool = False,
     ) -> Receipt:
         """Atom entry deposit.
 
         Args:
             request: the request holding information to parse
                 and inject in db
             headers: parsed request headers
             collection_name: the associated client
             deposit: deposit to be updated
             replace_metadata: 'Update or add' request to existing
               deposit. If False (default), this adds new metadata request to
               existing ones. Otherwise, this will replace existing metadata.
             replace_archives: 'Update or add' request to existing
               deposit. If False (default), this adds new archive request to
               existing ones. Otherwise, this will replace existing archives.
               ones.
 
         Raises:
             - 400 (bad request) if the request is not providing an external
               identifier
             - 400 (bad request) if the request's body is empty
             - 415 (unsupported media type) if a wrong media type is provided
 
         """
         try:
             raw_metadata, metadata = self._read_metadata(request.data)
         except ParserError:
             raise DepositError(
                 BAD_REQUEST,
                 "Malformed xml metadata",
                 "The xml received is malformed. "
                 "Please ensure your metadata file is correctly formatted.",
             )
 
         if metadata is None:
             raise DepositError(
                 BAD_REQUEST,
                 "Empty body request is not supported",
                 "Atom entry deposit is supposed to send for metadata. "
                 "If the body is empty, there is no metadata.",
             )
 
         create_origin = metadata.get("swh:deposit", {}).get("swh:create_origin")
         add_to_origin = metadata.get("swh:deposit", {}).get("swh:add_to_origin")
 
         if create_origin and add_to_origin:
             raise DepositError(
                 BAD_REQUEST,
                 "<swh:create_origin> and <swh:add_to_origin> are mutually exclusive, "
                 "as they respectively create a new origin and add to an existing "
                 "origin.",
             )
 
         if create_origin:
             origin_url = create_origin["swh:origin"]["@url"]
             check_client_origin(deposit.client, origin_url)
             deposit.origin_url = origin_url
 
         if add_to_origin:
             origin_url = add_to_origin["swh:origin"]["@url"]
             check_client_origin(deposit.client, origin_url)
             deposit.parent = (
                 Deposit.objects.filter(
                     client=deposit.client,
                     origin_url=origin_url,
                     status=DEPOSIT_STATUS_LOAD_SUCCESS,
                 )
                 .order_by("-id")[0:1]
                 .get()
             )
 
         if "atom:external_identifier" in metadata:
             # Deprecated tag.
             # When clients stopped using it, this should raise an error
             # unconditionally
 
             if deposit.origin_url:
                 raise DepositError(
                     BAD_REQUEST,
                     "<external_identifier> is deprecated, you should only use "
                     "<swh:create_origin> from now on.",
                 )
 
             if deposit.parent:
                 raise DepositError(
                     BAD_REQUEST, "<external_identifier> is deprecated.",
                 )
 
             if headers.slug and metadata["atom:external_identifier"] != headers.slug:
                 raise DepositError(
                     BAD_REQUEST,
                     "The 'external_identifier' tag is deprecated, "
                     "the Slug header should be used instead.",
                 )
 
         # Determine if we are in the metadata-only deposit case
         try:
             swhid = parse_swh_reference(metadata)
         except ValidationError as e:
             raise DepositError(
                 PARSING_ERROR, "Invalid SWHID reference", str(e),
             )
 
         if swhid is not None and (
             deposit.origin_url or deposit.parent or deposit.external_id
         ):
             raise DepositError(
                 BAD_REQUEST,
                 "<swh:reference> is for metadata-only deposits and "
                 "<swh:create_origin> / <swh:add_to_origin> / Slug are for "
                 "code deposits, only one may be used on a given deposit.",
             )
 
         self._deposit_put(
             deposit=deposit, in_progress=headers.in_progress,
         )
 
         if swhid is not None:
             swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit(
                 deposit, swhid, metadata, raw_metadata
             )
 
             deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
             if isinstance(swhid_ref, SWHID):
                 deposit.swhid = str(swhid)
                 deposit.swhid_context = str(swhid_ref)
             deposit.complete_date = depo_request.date
             deposit.reception_date = depo_request.date
             deposit.save()
 
             return Receipt(
                 deposit_id=deposit.id,
                 deposit_date=depo_request.date,
                 status=deposit.status,
                 archive=None,
             )
 
         self._deposit_request_put(
             deposit,
             {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
             replace_metadata,
             replace_archives,
         )
 
         return Receipt(
             deposit_id=deposit.id,
             deposit_date=deposit.reception_date,
             status=deposit.status,
             archive=None,
         )
 
     def _empty_post(
         self,
         request: Request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Deposit,
     ) -> Receipt:
         """Empty post to finalize a deposit.
 
         Args:
             request: the request holding information to parse
                 and inject in db
             headers: parsed request headers
             collection_name: the associated client
             deposit: deposit to be finalized
         """
         self._complete_deposit(deposit)
 
         assert deposit.complete_date is not None
 
         return Receipt(
             deposit_id=deposit.id,
             deposit_date=deposit.complete_date,
             status=deposit.status,
             archive=None,
         )
 
     def additional_checks(
         self,
         request: Request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Optional[Deposit],
     ) -> Dict[str, Any]:
         """Permit the child class to enrich additional checks.
 
         Returns:
             dict with 'error' detailing the problem.
 
         """
         return {}
 
     def get_client(self, request) -> DepositClient:
         # This class depends on AuthenticatedAPIView, so request.user.username
         # is always set
         username = request.user.username
         assert username is not None
 
         if self._client is None:
             try:
                 self._client = DepositClient.objects.get(  # type: ignore
                     username=username
                 )
             except DepositClient.DoesNotExist:
                 raise DepositError(NOT_FOUND, f"Unknown client name {username}")
 
         assert self._client.username == username
 
         return self._client
 
     def checks(
         self, request: Request, collection_name: str, deposit: Optional[Deposit] = None
     ) -> ParsedRequestHeaders:
         if deposit is None:
             collection = get_collection_by_name(collection_name)
         else:
             assert collection_name == deposit.collection.name
             collection = deposit.collection
 
         client = self.get_client(request)
         collection_id = collection.id
         collections = client.collections
         assert collections is not None
         if collection_id not in collections:
             raise DepositError(
                 FORBIDDEN,
                 f"Client {client.username} cannot access collection {collection_name}",
             )
 
         headers = self._read_headers(request)
 
         if deposit is not None:
             self.restrict_access(request, headers, deposit)
 
         if headers.on_behalf_of:
             raise DepositError(MEDIATION_NOT_ALLOWED, "Mediation is not supported.")
 
         self.additional_checks(request, headers, collection_name, deposit)
 
         return headers
 
     def restrict_access(
         self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit
     ) -> None:
         """Allow modifications on deposit with status 'partial' only, reject the rest.
 
         """
         if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
             summary = "You can only act on deposit with status '%s'" % (
                 DEPOSIT_STATUS_PARTIAL,
             )
             description = f"This deposit has status '{deposit.status}'"
             raise DepositError(
                 BAD_REQUEST, summary=summary, verbose_description=description
             )
 
     def _basic_not_allowed_method(self, request: Request, method: str):
         raise DepositError(
             METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint",
         )
 
     def get(
         self, request: Request, collection_name: str, deposit_id: int
     ) -> Union[HttpResponse, FileResponse]:
         return self._basic_not_allowed_method(request, "GET")
 
     def post(
         self, request: Request, collection_name: str, deposit_id: Optional[int] = None
     ) -> HttpResponse:
         return self._basic_not_allowed_method(request, "POST")
 
     def put(
         self, request: Request, collection_name: str, deposit_id: int
     ) -> HttpResponse:
         return self._basic_not_allowed_method(request, "PUT")
 
     def delete(
         self, request: Request, collection_name: str, deposit_id: Optional[int] = None
     ) -> HttpResponse:
         return self._basic_not_allowed_method(request, "DELETE")
 
 
 class APIGet(APIBase, metaclass=ABCMeta):
     """Mixin for class to support GET method.
 
     """
 
     def get(
         self, request: Request, collection_name: str, deposit_id: int
     ) -> Union[HttpResponse, FileResponse]:
         """Endpoint to create/add resources to deposit.
 
         Returns:
             200 response when no error during routine occurred
             400 if the deposit does not belong to the collection
             404 if the deposit or the collection does not exist
 
         """
         deposit = get_deposit_by_id(deposit_id, collection_name)
         self.checks(request, collection_name, deposit)
 
         r = self.process_get(request, collection_name, deposit)
 
         status, content, content_type = r
         if content_type == "swh/generator":
             with content as path:
                 return FileResponse(
                     open(path, "rb"), status=status, content_type="application/zip"
                 )
         if content_type == "application/json":
             return HttpResponse(
                 json.dumps(content), status=status, content_type=content_type
             )
         return HttpResponse(content, status=status, content_type=content_type)
 
     @abstractmethod
     def process_get(
         self, request: Request, collection_name: str, deposit: Deposit
     ) -> Tuple[int, Any, str]:
         """Routine to deal with the deposit's get processing.
 
         Returns:
             Tuple status, stream of content, content-type
 
         """
         pass
 
 
 class APIPost(APIBase, metaclass=ABCMeta):
     """Mixin for class to support POST method.
 
     """
 
     def post(
         self, request: Request, collection_name: str, deposit_id: Optional[int] = None
     ) -> HttpResponse:
         """Endpoint to create/add resources to deposit.
 
         Returns:
             204 response when no error during routine occurred.
             400 if the deposit does not belong to the collection
             404 if the deposit or the collection does not exist
 
         """
         if deposit_id is None:
             deposit = None
         else:
             deposit = get_deposit_by_id(deposit_id, collection_name)
         headers = self.checks(request, collection_name, deposit)
 
         status, iri_key, receipt = self.process_post(
             request, headers, collection_name, deposit
         )
 
         return self._make_deposit_receipt(
             request, collection_name, status, iri_key, receipt,
         )
 
     def _make_deposit_receipt(
         self,
         request,
         collection_name: str,
         status: int,
         iri_key: str,
         receipt: Receipt,
     ) -> HttpResponse:
         """Returns an HttpResponse with a SWORD Deposit receipt as content."""
 
         # Build the IRIs in the receipt
         args = [collection_name, receipt.deposit_id]
         iris = {
             iri: request.build_absolute_uri(reverse(iri, args=args))
             for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI]
         }
 
         context = {
             **attr.asdict(receipt),
             **iris,
             "packagings": ACCEPT_PACKAGINGS,
         }
 
         response = render(
             request,
             "deposit/deposit_receipt.xml",
             context=context,
             content_type="application/xml",
             status=status,
         )
         response._headers["location"] = "Location", iris[iri_key]  # type: ignore
         return response
 
     @abstractmethod
     def process_post(
         self,
         request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Optional[Deposit] = None,
     ) -> Tuple[int, str, Receipt]:
         """Routine to deal with the deposit's processing.
 
         Returns
             Tuple of:
             - response status code (200, 201, etc...)
             - key iri (EM_IRI, EDIT_IRI, etc...)
             - Receipt
 
         """
         pass
 
 
 class APIPut(APIBase, metaclass=ABCMeta):
     """Mixin for class to support PUT method.
 
     """
 
     def put(
         self, request: Request, collection_name: str, deposit_id: int
     ) -> HttpResponse:
         """Endpoint to update deposit resources.
 
         Returns:
             204 response when no error during routine occurred.
             400 if the deposit does not belong to the collection
             404 if the deposit or the collection does not exist
 
         """
         if deposit_id is None:
             deposit = None
         else:
             deposit = get_deposit_by_id(deposit_id, collection_name)
         headers = self.checks(request, collection_name, deposit)
         self.process_put(request, headers, collection_name, deposit)
 
         return HttpResponse(status=status.HTTP_204_NO_CONTENT)
 
     @abstractmethod
     def process_put(
         self,
         request: Request,
         headers: ParsedRequestHeaders,
         collection_name: str,
         deposit: Deposit,
     ) -> None:
         """Routine to deal with updating a deposit in some way.
 
         Returns
             dictionary of the processing result
 
         """
         pass
 
 
 class APIDelete(APIBase, metaclass=ABCMeta):
     """Mixin for class to support DELETE method.
 
     """
 
     def delete(
         self, request: Request, collection_name: str, deposit_id: Optional[int] = None
     ) -> HttpResponse:
         """Endpoint to delete some deposit's resources (archives, deposit).
 
         Returns:
             204 response when no error during routine occurred.
             400 if the deposit does not belong to the collection
             404 if the deposit or the collection does not exist
 
         """
         assert deposit_id is not None
         deposit = get_deposit_by_id(deposit_id, collection_name)
         self.checks(request, collection_name, deposit)
         self.process_delete(request, collection_name, deposit)
 
         return HttpResponse(status=status.HTTP_204_NO_CONTENT)
 
     @abstractmethod
     def process_delete(
         self, request: Request, collection_name: str, deposit: Deposit
     ) -> None:
         """Routine to delete a resource.
 
         This is mostly not allowed except for the
         EM_IRI (cf. .api.deposit_update.APIUpdateArchive)
 
         """
         pass
diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py
index e86d65cd..a62a93ad 100644
--- a/swh/deposit/parsers.py
+++ b/swh/deposit/parsers.py
@@ -1,194 +1,94 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 """Module in charge of defining parsers with SWORD 2.0 supported mediatypes.
 
 """
 
 import logging
-from typing import Dict, Optional, Union
 from xml.parsers.expat import ExpatError
 
 from django.conf import settings
 from rest_framework.parsers import BaseParser, FileUploadParser, MultiPartParser
 
 from swh.deposit.errors import ParserError
 from swh.deposit.utils import parse_xml as _parse_xml
-from swh.model.exceptions import ValidationError
-from swh.model.identifiers import (
-    DIRECTORY,
-    RELEASE,
-    REVISION,
-    SNAPSHOT,
-    SWHID,
-    parse_swhid,
-)
 
 logger = logging.getLogger(__name__)
 
 
 class SWHFileUploadZipParser(FileUploadParser):
     """File upload parser limited to zip archive.
 
     """
 
     media_type = "application/zip"
 
 
 class SWHFileUploadTarParser(FileUploadParser):
     """File upload parser limited to tarball (tar, tar.gz, tar.*) archives.
 
     """
 
     media_type = "application/x-tar"
 
 
 class SWHXMLParser(BaseParser):
     """
     XML parser.
     """
 
     media_type = "application/xml"
 
     def parse(self, stream, media_type=None, parser_context=None):
         """
         Parses the incoming bytestream as XML and returns the resulting data.
         """
         parser_context = parser_context or {}
         encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET)
         return _parse_xml(stream, encoding=encoding)
 
 
 class SWHAtomEntryParser(SWHXMLParser):
     """Atom entry parser limited to specific mediatype
 
     """
 
     media_type = "application/atom+xml;type=entry"
 
     def parse(self, stream, media_type=None, parser_context=None):
         # We do not actually want to parse the stream yet
         # because we want to keep the raw data as well
         # this is done later in the atom entry call
         # (cf. swh.deposit.api.common.APIBase._atom_entry)
         return stream
 
 
 class SWHMultiPartParser(MultiPartParser):
     """Multipart parser limited to a subset of mediatypes.
 
     """
 
     media_type = "multipart/*; *"
 
 
 def parse_xml(raw_content):
     """Parse xml body.
 
     Args:
         raw_content (bytes): The content to parse
 
     Raises:
         ParserError in case of a malformed xml
 
     Returns:
         content parsed as dict.
 
     """
     try:
         return SWHXMLParser().parse(raw_content)
     except ExpatError as e:
         raise ParserError(str(e))
-
-
-ALLOWED_QUALIFIERS_NODE_TYPE = (SNAPSHOT, REVISION, RELEASE, DIRECTORY)
-
-
-def parse_swh_reference(metadata: Dict) -> Optional[Union[str, SWHID]]:
-    """Parse swh reference within the metadata dict (or origin) reference if found, None
-    otherwise.
-
-    <swh:deposit>
-      <swh:reference>
-        <swh:origin url='https://github.com/user/repo'/>
-      </swh:reference>
-    </swh:deposit>
-
-    or:
-
-    <swh:deposit>
-      <swh:reference>
-        <swh:object swhid="swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/"
-      />
-    </swh:deposit>
-
-    Raises:
-        ValidationError in case the swhid referenced (if any) is invalid
-
-    Returns:
-        Either swhid or origin reference if any. None otherwise.
-
-    """  # noqa
-    visit_swhid = None
-    anchor_swhid = None
-
-    swh_deposit = metadata.get("swh:deposit")
-    if not swh_deposit:
-        return None
-
-    swh_reference = swh_deposit.get("swh:reference")
-    if not swh_reference:
-        return None
-
-    swh_origin = swh_reference.get("swh:origin")
-    if swh_origin:
-        url = swh_origin.get("@url")
-        if url:
-            return url
-
-    swh_object = swh_reference.get("swh:object")
-    if not swh_object:
-        return None
-
-    swhid = swh_object.get("@swhid")
-    if not swhid:
-        return None
-    swhid_reference = parse_swhid(swhid)
-
-    if swhid_reference.metadata:
-        anchor = swhid_reference.metadata.get("anchor")
-        if anchor:
-            anchor_swhid = parse_swhid(anchor)
-            if anchor_swhid.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
-                error_msg = (
-                    "anchor qualifier should be a core SWHID with type one of "
-                    f" {', '.join(ALLOWED_QUALIFIERS_NODE_TYPE)}"
-                )
-                raise ValidationError(error_msg)
-
-        visit = swhid_reference.metadata.get("visit")
-        if visit:
-            visit_swhid = parse_swhid(visit)
-            if visit_swhid.object_type != SNAPSHOT:
-                raise ValidationError(
-                    f"visit qualifier should be a core SWHID with type {SNAPSHOT}"
-                )
-
-        if (
-            visit_swhid
-            and anchor_swhid
-            and visit_swhid.object_type == SNAPSHOT
-            and anchor_swhid.object_type == SNAPSHOT
-        ):
-            logger.warn(
-                "SWHID use of both anchor and visit targeting "
-                f"a snapshot: {swhid_reference}"
-            )
-            raise ValidationError(
-                "'anchor=swh:1:snp:' is not supported when 'visit' is also provided."
-            )
-
-    return swhid_reference
diff --git a/swh/deposit/tests/api/test_parsers.py b/swh/deposit/tests/api/test_parsers.py
index 1ca0be61..cc606be3 100644
--- a/swh/deposit/tests/api/test_parsers.py
+++ b/swh/deposit/tests/api/test_parsers.py
@@ -1,242 +1,129 @@
 # Copyright (C) 2018-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from collections import OrderedDict
 import io
 
-import pytest
-
-from swh.deposit.parsers import SWHXMLParser, parse_swh_reference, parse_xml
-from swh.model.exceptions import ValidationError
-from swh.model.identifiers import parse_swhid
+from swh.deposit.parsers import SWHXMLParser
 
 
 def test_parsing_without_duplicates():
     xml_no_duplicate = io.BytesIO(
         b"""<?xml version="1.0"?>
 <entry xmlns="http://www.w3.org/2005/Atom"
        xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
     <title>Awesome Compiler</title>
     <codemeta:license>
         <codemeta:name>GPL3.0</codemeta:name>
         <codemeta:url>https://opensource.org/licenses/GPL-3.0</codemeta:url>
     </codemeta:license>
     <codemeta:runtimePlatform>Python3</codemeta:runtimePlatform>
     <codemeta:author>
         <codemeta:name>author1</codemeta:name>
         <codemeta:affiliation>Inria</codemeta:affiliation>
     </codemeta:author>
     <codemeta:programmingLanguage>ocaml</codemeta:programmingLanguage>
     <codemeta:issueTracker>http://issuetracker.com</codemeta:issueTracker>
 </entry>"""
     )
 
     actual_result = SWHXMLParser().parse(xml_no_duplicate)
     expected_dict = OrderedDict(
         [
             ("atom:title", "Awesome Compiler"),
             (
                 "codemeta:license",
                 OrderedDict(
                     [
                         ("codemeta:name", "GPL3.0"),
                         ("codemeta:url", "https://opensource.org/licenses/GPL-3.0"),
                     ]
                 ),
             ),
             ("codemeta:runtimePlatform", "Python3"),
             (
                 "codemeta:author",
                 OrderedDict(
                     [("codemeta:name", "author1"), ("codemeta:affiliation", "Inria")]
                 ),
             ),
             ("codemeta:programmingLanguage", "ocaml"),
             ("codemeta:issueTracker", "http://issuetracker.com"),
         ]
     )
     assert expected_dict == actual_result
 
 
 def test_parsing_with_duplicates():
     xml_with_duplicates = io.BytesIO(
         b"""<?xml version="1.0"?>
 <entry xmlns="http://www.w3.org/2005/Atom"
        xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
     <title>Another Compiler</title>
     <codemeta:runtimePlatform>GNU/Linux</codemeta:runtimePlatform>
     <codemeta:license>
         <codemeta:name>GPL3.0</codemeta:name>
         <codemeta:url>https://opensource.org/licenses/GPL-3.0</codemeta:url>
     </codemeta:license>
     <codemeta:runtimePlatform>Un*x</codemeta:runtimePlatform>
     <codemeta:author>
         <codemeta:name>author1</codemeta:name>
         <codemeta:affiliation>Inria</codemeta:affiliation>
     </codemeta:author>
     <codemeta:author>
         <codemeta:name>author2</codemeta:name>
         <codemeta:affiliation>Inria</codemeta:affiliation>
     </codemeta:author>
     <codemeta:programmingLanguage>ocaml</codemeta:programmingLanguage>
     <codemeta:programmingLanguage>haskell</codemeta:programmingLanguage>
     <codemeta:license>
         <codemeta:name>spdx</codemeta:name>
         <codemeta:url>http://spdx.org</codemeta:url>
     </codemeta:license>
     <codemeta:programmingLanguage>python3</codemeta:programmingLanguage>
 </entry>"""
     )
 
     actual_result = SWHXMLParser().parse(xml_with_duplicates)
 
     expected_dict = OrderedDict(
         [
             ("atom:title", "Another Compiler"),
             ("codemeta:runtimePlatform", ["GNU/Linux", "Un*x"]),
             (
                 "codemeta:license",
                 [
                     OrderedDict(
                         [
                             ("codemeta:name", "GPL3.0"),
                             ("codemeta:url", "https://opensource.org/licenses/GPL-3.0"),
                         ]
                     ),
                     OrderedDict(
                         [("codemeta:name", "spdx"), ("codemeta:url", "http://spdx.org")]
                     ),
                 ],
             ),
             (
                 "codemeta:author",
                 [
                     OrderedDict(
                         [
                             ("codemeta:name", "author1"),
                             ("codemeta:affiliation", "Inria"),
                         ]
                     ),
                     OrderedDict(
                         [
                             ("codemeta:name", "author2"),
                             ("codemeta:affiliation", "Inria"),
                         ]
                     ),
                 ],
             ),
             ("codemeta:programmingLanguage", ["ocaml", "haskell", "python3"]),
         ]
     )
     assert expected_dict == actual_result
-
-
-@pytest.fixture
-def xml_with_origin_reference():
-    xml_data = """<?xml version="1.0"?>
-  <entry xmlns="http://www.w3.org/2005/Atom"
-           xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
-           xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
-      <swh:deposit>
-        <swh:reference>
-          <swh:origin url="{url}"/>
-        </swh:reference>
-      </swh:deposit>
-  </entry>
-    """
-    return xml_data.strip()
-
-
-def test_parse_swh_reference_origin(xml_with_origin_reference):
-    url = "https://url"
-    xml_data = xml_with_origin_reference.format(url=url)
-    metadata = parse_xml(xml_data)
-
-    actual_origin = parse_swh_reference(metadata)
-    assert actual_origin == url
-
-
-@pytest.fixture
-def xml_with_empty_reference():
-    xml_data = """<?xml version="1.0"?>
-  <entry xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
-      <swh:deposit>
-        {swh_reference}
-      </swh:deposit>
-  </entry>
-    """
-    return xml_data.strip()
-
-
-@pytest.mark.parametrize(
-    "xml_ref",
-    [
-        "",
-        "<swh:reference></swh:reference>",
-        "<swh:reference><swh:object /></swh:reference>",
-        """<swh:reference><swh:object swhid="" /></swh:reference>""",
-    ],
-)
-def test_parse_swh_reference_empty(xml_with_empty_reference, xml_ref):
-    xml_body = xml_with_empty_reference.format(swh_reference=xml_ref)
-    metadata = parse_xml(xml_body)
-
-    assert parse_swh_reference(metadata) is None
-
-
-@pytest.fixture
-def xml_with_swhid(atom_dataset):
-    return atom_dataset["entry-data-with-swhid"]
-
-
-@pytest.mark.parametrize(
-    "swhid",
-    [
-        "swh:1:cnt:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/",  # noqa
-        "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
-        "swh:1:rev:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
-        "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:rel:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
-        "swh:1:snp:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:snp:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
-        "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
-    ],
-)
-def test_parse_swh_reference_swhid(swhid, xml_with_swhid):
-    xml_data = xml_with_swhid.format(swhid=swhid)
-    metadata = parse_xml(xml_data)
-
-    actual_swhid = parse_swh_reference(metadata)
-    assert actual_swhid is not None
-
-    expected_swhid = parse_swhid(swhid)
-    assert actual_swhid == expected_swhid
-
-
-@pytest.mark.parametrize(
-    "invalid_swhid,error_msg",
-    [
-        ("swh:1:cnt:31b5c8cc985d190b5a7ef4878128ebfdc235", "Unexpected length"),
-        (
-            "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:rev:0175049fc45055a3824a1675ac06e3711619a55a",  # noqa
-            "visit qualifier should be a core SWHID with type",
-        ),
-        (
-            "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;anchor=swh:1:cnt:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/",  # noqa
-            "anchor qualifier should be a core SWHID with type one of",
-        ),
-        (
-            "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04",  # noqa
-            "anchor=swh:1:snp",
-        ),
-    ],
-)
-def test_parse_swh_reference_invalid_swhid(invalid_swhid, error_msg, xml_with_swhid):
-    """Unparsable swhid should raise
-
-    """
-    xml_invalid_swhid = xml_with_swhid.format(swhid=invalid_swhid)
-    metadata = parse_xml(xml_invalid_swhid)
-
-    with pytest.raises(ValidationError, match=error_msg):
-        parse_swh_reference(metadata)
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
index 430e5790..21e00537 100644
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -1,200 +1,311 @@
 # Copyright (C) 2018-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Union
 from unittest.mock import patch
 
 import pytest
 
 from swh.deposit import utils
+from swh.deposit.parsers import parse_xml
+from swh.model.exceptions import ValidationError
 from swh.model.identifiers import SWHID, parse_swhid
 from swh.model.model import MetadataTargetType
 
 
+@pytest.fixture
+def xml_with_origin_reference():
+    xml_data = """<?xml version="1.0"?>
+  <entry xmlns="http://www.w3.org/2005/Atom"
+           xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+           xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+      <swh:deposit>
+        <swh:reference>
+          <swh:origin url="{url}"/>
+        </swh:reference>
+      </swh:deposit>
+  </entry>
+    """
+    return xml_data.strip()
+
+
 def test_merge():
     """Calling utils.merge on dicts should merge without losing information
 
     """
     d0 = {"author": "someone", "license": [["gpl2"]], "a": 1}
 
     d1 = {
         "author": ["author0", {"name": "author1"}],
         "license": [["gpl3"]],
         "b": {"1": "2"},
     }
 
     d2 = {"author": map(lambda x: x, ["else"]), "license": "mit", "b": {"2": "3",}}
 
     d3 = {
         "author": (v for v in ["no one"]),
     }
 
     actual_merge = utils.merge(d0, d1, d2, d3)
 
     expected_merge = {
         "a": 1,
         "license": [["gpl2"], ["gpl3"], "mit"],
         "author": ["someone", "author0", {"name": "author1"}, "else", "no one"],
         "b": {"1": "2", "2": "3",},
     }
     assert actual_merge == expected_merge
 
 
 def test_merge_2():
     d0 = {"license": "gpl2", "runtime": {"os": "unix derivative"}}
 
     d1 = {"license": "gpl3", "runtime": "GNU/Linux"}
 
     expected = {
         "license": ["gpl2", "gpl3"],
         "runtime": [{"os": "unix derivative"}, "GNU/Linux"],
     }
 
     actual = utils.merge(d0, d1)
     assert actual == expected
 
 
 def test_merge_edge_cases():
     input_dict = {
         "license": ["gpl2", "gpl3"],
         "runtime": [{"os": "unix derivative"}, "GNU/Linux"],
     }
     # against empty dict
     actual = utils.merge(input_dict, {})
     assert actual == input_dict
 
     # against oneself
     actual = utils.merge(input_dict, input_dict, input_dict)
     assert actual == input_dict
 
 
 def test_merge_one_dict():
     """Merge one dict should result in the same dict value
 
     """
     input_and_expected = {"anything": "really"}
     actual = utils.merge(input_and_expected)
     assert actual == input_and_expected
 
 
 def test_merge_raise():
     """Calling utils.merge with any no dict argument should raise
 
     """
     d0 = {"author": "someone", "a": 1}
 
     d1 = ["not a dict"]
 
     with pytest.raises(ValueError):
         utils.merge(d0, d1)
 
     with pytest.raises(ValueError):
         utils.merge(d1, d0)
 
     with pytest.raises(ValueError):
         utils.merge(d1)
 
     assert utils.merge(d0) == d0
 
 
 @patch("swh.deposit.utils.normalize_timestamp", side_effect=lambda x: x)
 def test_normalize_date_0(mock_normalize):
     """When date is a list, choose the first date and normalize it
 
     Note: We do not test swh.model.identifiers which is already tested
     in swh.model
 
     """
     actual_date = utils.normalize_date(["2017-10-12", "date1"])
 
     expected_date = "2017-10-12 00:00:00+00:00"
 
     assert str(actual_date) == expected_date
 
 
 @patch("swh.deposit.utils.normalize_timestamp", side_effect=lambda x: x)
 def test_normalize_date_1(mock_normalize):
     """Providing a date in a reasonable format, everything is fine
 
     Note: We do not test swh.model.identifiers which is already tested
     in swh.model
 
     """
     actual_date = utils.normalize_date("2018-06-11 17:02:02")
 
     expected_date = "2018-06-11 17:02:02+00:00"
 
     assert str(actual_date) == expected_date
 
 
 @patch("swh.deposit.utils.normalize_timestamp", side_effect=lambda x: x)
 def test_normalize_date_doing_irrelevant_stuff(mock_normalize):
     """Providing a date with only the year results in a reasonable date
 
     Note: We do not test swh.model.identifiers which is already tested
     in swh.model
 
     """
     actual_date = utils.normalize_date("2017")
 
     expected_date = "2017-01-01 00:00:00+00:00"
 
     assert str(actual_date) == expected_date
 
 
 @pytest.mark.parametrize(
     "swhid_or_origin,expected_type,expected_metadata_context",
     [
         ("https://something", MetadataTargetType.ORIGIN, {"origin": None}),
         (
             "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49",
             MetadataTargetType.CONTENT,
             {"origin": None},
         ),
         (
             "swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah",
             MetadataTargetType.SNAPSHOT,
             {"origin": "http://blah", "path": None},
         ),
         (
             "swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path",
             MetadataTargetType.DIRECTORY,
             {"origin": None, "path": b"/path"},
         ),
         (
             "swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",  # noqa
             MetadataTargetType.REVISION,
             {
                 "origin": None,
                 "path": None,
                 "snapshot": parse_swhid(
                     "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
                 ),
             },
         ),
         (
             "swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",  # noqa
             MetadataTargetType.RELEASE,
             {
                 "origin": None,
                 "path": None,
                 "directory": parse_swhid(
                     "swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
                 ),
             },
         ),
     ],
 )
 def test_compute_metadata_context(
     swhid_or_origin: Union[str, SWHID], expected_type, expected_metadata_context
 ):
     if expected_type != MetadataTargetType.ORIGIN:
         assert isinstance(swhid_or_origin, str)
         swhid_or_origin = parse_swhid(swhid_or_origin)
 
     object_type, metadata_context = utils.compute_metadata_context(swhid_or_origin)
 
     assert object_type == expected_type
     assert metadata_context == expected_metadata_context
+
+
+def test_parse_swh_reference_origin(xml_with_origin_reference):
+    url = "https://url"
+    xml_data = xml_with_origin_reference.format(url=url)
+    metadata = parse_xml(xml_data)
+
+    actual_origin = utils.parse_swh_reference(metadata)
+    assert actual_origin == url
+
+
+@pytest.fixture
+def xml_with_empty_reference():
+    xml_data = """<?xml version="1.0"?>
+  <entry xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+      <swh:deposit>
+        {swh_reference}
+      </swh:deposit>
+  </entry>
+    """
+    return xml_data.strip()
+
+
+@pytest.mark.parametrize(
+    "xml_ref",
+    [
+        "",
+        "<swh:reference></swh:reference>",
+        "<swh:reference><swh:object /></swh:reference>",
+        """<swh:reference><swh:object swhid="" /></swh:reference>""",
+    ],
+)
+def test_parse_swh_reference_empty(xml_with_empty_reference, xml_ref):
+    xml_body = xml_with_empty_reference.format(swh_reference=xml_ref)
+    metadata = utils.parse_xml(xml_body)
+
+    assert utils.parse_swh_reference(metadata) is None
+
+
+@pytest.fixture
+def xml_with_swhid(atom_dataset):
+    return atom_dataset["entry-data-with-swhid"]
+
+
+@pytest.mark.parametrize(
+    "swhid",
+    [
+        "swh:1:cnt:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/",  # noqa
+        "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
+        "swh:1:rev:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
+        "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:rel:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
+        "swh:1:snp:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:snp:9c5de20cfb54682370a398fcc733e829903c8cba",  # noqa
+        "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+    ],
+)
+def test_parse_swh_reference_swhid(swhid, xml_with_swhid):
+    xml_data = xml_with_swhid.format(swhid=swhid)
+    metadata = utils.parse_xml(xml_data)
+
+    actual_swhid = utils.parse_swh_reference(metadata)
+    assert actual_swhid is not None
+
+    expected_swhid = parse_swhid(swhid)
+    assert actual_swhid == expected_swhid
+
+
+@pytest.mark.parametrize(
+    "invalid_swhid,error_msg",
+    [
+        ("swh:1:cnt:31b5c8cc985d190b5a7ef4878128ebfdc235", "Unexpected length"),
+        (
+            "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:rev:0175049fc45055a3824a1675ac06e3711619a55a",  # noqa
+            "visit qualifier should be a core SWHID with type",
+        ),
+        (
+            "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;anchor=swh:1:cnt:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/",  # noqa
+            "anchor qualifier should be a core SWHID with type one of",
+        ),
+        (
+            "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04",  # noqa
+            "anchor=swh:1:snp",
+        ),
+    ],
+)
+def test_parse_swh_reference_invalid_swhid(invalid_swhid, error_msg, xml_with_swhid):
+    """Unparsable swhid should raise
+
+    """
+    xml_invalid_swhid = xml_with_swhid.format(swhid=invalid_swhid)
+    metadata = utils.parse_xml(xml_invalid_swhid)
+
+    with pytest.raises(ValidationError, match=error_msg):
+        utils.parse_swh_reference(metadata)
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
index 6fdc1c6e..9bbd12c3 100644
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -1,142 +1,244 @@
 # Copyright (C) 2018-2020 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import logging
 from types import GeneratorType
-from typing import Any, Dict, Tuple, Union
+from typing import Any, Dict, Optional, Tuple, Union
 
 import iso8601
 import xmltodict
 
-from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid
+from swh.model.exceptions import ValidationError
+from swh.model.identifiers import (
+    DIRECTORY,
+    RELEASE,
+    REVISION,
+    SNAPSHOT,
+    SWHID,
+    normalize_timestamp,
+    parse_swhid,
+)
 from swh.model.model import MetadataTargetType
 
+logger = logging.getLogger(__name__)
+
 
 def parse_xml(stream, encoding="utf-8"):
     namespaces = {
         "http://www.w3.org/2005/Atom": "atom",
         "http://www.w3.org/2007/app": "app",
         "http://purl.org/dc/terms/": "dc",
         "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta",
         "http://purl.org/net/sword/terms/": "sword",
         "https://www.softwareheritage.org/schema/2018/deposit": "swh",
     }
 
     data = xmltodict.parse(
         stream,
         encoding=encoding,
         namespaces=namespaces,
         process_namespaces=True,
         dict_constructor=dict,
     )
     if "atom:entry" in data:
         data = data["atom:entry"]
     return data
 
 
 def merge(*dicts):
     """Given an iterator of dicts, merge them losing no information.
 
     Args:
         *dicts: arguments are all supposed to be dict to merge into one
 
     Returns:
         dict merged without losing information
 
     """
 
     def _extend(existing_val, value):
         """Given an existing value and a value (as potential lists), merge
            them together without repetition.
 
         """
         if isinstance(value, (list, map, GeneratorType)):
             vals = value
         else:
             vals = [value]
         for v in vals:
             if v in existing_val:
                 continue
             existing_val.append(v)
         return existing_val
 
     d = {}
     for data in dicts:
         if not isinstance(data, dict):
             raise ValueError("dicts is supposed to be a variable arguments of dict")
 
         for key, value in data.items():
             existing_val = d.get(key)
             if not existing_val:
                 d[key] = value
                 continue
             if isinstance(existing_val, (list, map, GeneratorType)):
                 new_val = _extend(existing_val, value)
             elif isinstance(existing_val, dict):
                 if isinstance(value, dict):
                     new_val = merge(existing_val, value)
                 else:
                     new_val = _extend([existing_val], value)
             else:
                 new_val = _extend([existing_val], value)
             d[key] = new_val
     return d
 
 
 def normalize_date(date):
     """Normalize date fields as expected by swh workers.
 
     If date is a list, elect arbitrarily the first element of that
     list
 
     If date is (then) a string, parse it through
     dateutil.parser.parse to extract a datetime.
 
     Then normalize it through
     swh.model.identifiers.normalize_timestamp.
 
     Returns
         The swh date object
 
     """
     if isinstance(date, list):
         date = date[0]
     if isinstance(date, str):
         date = iso8601.parse_date(date)
 
     return normalize_timestamp(date)
 
 
 def compute_metadata_context(
     swhid_reference: Union[SWHID, str]
 ) -> Tuple[MetadataTargetType, Dict[str, Any]]:
     """Given a SWHID object, determine the context as a dict.
 
     The parse_swhid calls within are not expected to raise (because they should have
     been caught early on).
 
     """
     metadata_context: Dict[str, Any] = {"origin": None}
     if isinstance(swhid_reference, SWHID):
         object_type = MetadataTargetType(swhid_reference.object_type)
         assert object_type != MetadataTargetType.ORIGIN
 
         if swhid_reference.metadata:
             path = swhid_reference.metadata.get("path")
             metadata_context = {
                 "origin": swhid_reference.metadata.get("origin"),
                 "path": path.encode() if path else None,
             }
             snapshot = swhid_reference.metadata.get("visit")
             if snapshot:
                 metadata_context["snapshot"] = parse_swhid(snapshot)
 
             anchor = swhid_reference.metadata.get("anchor")
             if anchor:
                 anchor_swhid = parse_swhid(anchor)
                 metadata_context[anchor_swhid.object_type] = anchor_swhid
     else:
         object_type = MetadataTargetType.ORIGIN
 
     return object_type, metadata_context
+
+
+ALLOWED_QUALIFIERS_NODE_TYPE = (SNAPSHOT, REVISION, RELEASE, DIRECTORY)
+
+
+def parse_swh_reference(metadata: Dict) -> Optional[Union[str, SWHID]]:
+    """Parse swh reference within the metadata dict (or origin) reference if found, None
+    otherwise.
+
+    <swh:deposit>
+      <swh:reference>
+        <swh:origin url='https://github.com/user/repo'/>
+      </swh:reference>
+    </swh:deposit>
+
+    or:
+
+    <swh:deposit>
+      <swh:reference>
+        <swh:object swhid="swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=https://hal.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:4fc1e36fca86b2070204bedd51106014a614f321;anchor=swh:1:rev:9c5de20cfb54682370a398fcc733e829903c8cba;path=/moranegg-AffectationRO-df7f68b/"
+      />
+    </swh:deposit>
+
+    Raises:
+        ValidationError in case the swhid referenced (if any) is invalid
+
+    Returns:
+        Either swhid or origin reference if any. None otherwise.
+
+    """  # noqa
+    visit_swhid = None
+    anchor_swhid = None
+
+    swh_deposit = metadata.get("swh:deposit")
+    if not swh_deposit:
+        return None
+
+    swh_reference = swh_deposit.get("swh:reference")
+    if not swh_reference:
+        return None
+
+    swh_origin = swh_reference.get("swh:origin")
+    if swh_origin:
+        url = swh_origin.get("@url")
+        if url:
+            return url
+
+    swh_object = swh_reference.get("swh:object")
+    if not swh_object:
+        return None
+
+    swhid = swh_object.get("@swhid")
+    if not swhid:
+        return None
+    swhid_reference = parse_swhid(swhid)
+
+    if swhid_reference.metadata:
+        anchor = swhid_reference.metadata.get("anchor")
+        if anchor:
+            anchor_swhid = parse_swhid(anchor)
+            if anchor_swhid.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
+                error_msg = (
+                    "anchor qualifier should be a core SWHID with type one of "
+                    f" {', '.join(ALLOWED_QUALIFIERS_NODE_TYPE)}"
+                )
+                raise ValidationError(error_msg)
+
+        visit = swhid_reference.metadata.get("visit")
+        if visit:
+            visit_swhid = parse_swhid(visit)
+            if visit_swhid.object_type != SNAPSHOT:
+                raise ValidationError(
+                    f"visit qualifier should be a core SWHID with type {SNAPSHOT}"
+                )
+
+        if (
+            visit_swhid
+            and anchor_swhid
+            and visit_swhid.object_type == SNAPSHOT
+            and anchor_swhid.object_type == SNAPSHOT
+        ):
+            logger.warn(
+                "SWHID use of both anchor and visit targeting "
+                f"a snapshot: {swhid_reference}"
+            )
+            raise ValidationError(
+                "'anchor=swh:1:snp:' is not supported when 'visit' is also provided."
+            )
+
+    return swhid_reference