diff --git a/requirements-swh.txt b/requirements-swh.txt
index 9224ac8a..cc602280 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,9 +1,9 @@
 swh.auth[django] >= 0.5.3
 swh.core >= 0.0.95
 swh.counters >= 0.5.1
 swh.indexer >= 0.4.1
-swh.model >= 0.5.0
+swh.model >= 2.6.3
 swh.scheduler >= 0.7.0
 swh.search >= 0.2.0
-swh.storage >= 0.11.10
+swh.storage >= 0.31.0
 swh.vault >= 0.0.33
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
index 9033c291..025afebc 100644
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -1,20 +1,21 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.web.api.apiurls import APIUrls
 import swh.web.api.views.content  # noqa
 import swh.web.api.views.directory  # noqa
 import swh.web.api.views.graph  # noqa
 import swh.web.api.views.identifiers  # noqa
+import swh.web.api.views.metadata  # noqa
 import swh.web.api.views.origin  # noqa
 import swh.web.api.views.origin_save  # noqa
 import swh.web.api.views.ping  # noqa
 import swh.web.api.views.release  # noqa
 import swh.web.api.views.revision  # noqa
 import swh.web.api.views.snapshot  # noqa
 import swh.web.api.views.stat  # noqa
 import swh.web.api.views.vault  # noqa
 
 urlpatterns = APIUrls.get_url_patterns()
diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py
new file mode 100644
index 00000000..bb3e91b6
--- /dev/null
+++ b/swh/web/api/views/metadata.py
@@ -0,0 +1,177 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import base64
+
+import iso8601
+
+from django.http import HttpResponse
+
+from swh.model import hashutil, identifiers
+from swh.model.model import MetadataAuthority, MetadataAuthorityType
+from swh.web.api.apidoc import api_doc, format_docstring
+from swh.web.api.apiurls import api_route
+from swh.web.common import archive, converters
+from swh.web.common.exc import BadInputExc, NotFoundExc
+from swh.web.common.utils import reverse
+
+SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
+
+
+@api_route(
+    f"/raw-extrinsic-metadata/swhid/(?P<target>{SWHID_RE})/",
+    "api-1-raw-extrinsic-metadata-swhid",
+)
+@api_doc("/raw-extrinsic-metadata/swhid/")
+@format_docstring()
+def api_raw_extrinsic_metadata_swhid(request, target):
+    """
+    .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)
+
+        Returns raw `extrinsic metadata`_ collected on a given object.
+
+        .. _extrinsic metadata: https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata
+
+        :param string target: The SWHID of the object whose metadata should be returned
+        :query string authority: A metadata authority identifier, formatted as
+            `<type> <IRI>`. Required.
+        :query string after: An ISO representation of the minimum timestamp of metadata
+            to fetch. Defaults to allowing all metadata.
+        :query int limit: Maximum number of metadata objects to return.
+
+        {common_headers}
+
+        :>jsonarr string target: SWHID of the object described by this metadata
+        :>jsonarr string discovery_date: ISO8601 timestamp of the moment this
+            metadata was collected.
+        :>jsonarr object authority: authority this metadata is coming from
+        :>jsonarr object fetcher: tool used to fetch the metadata
+        :>jsonarr string format: short identifier of the format of the metadata
+        :>jsonarr string metadata_url: link to download the metadata "blob" itself
+        :>jsonarr string origin: URL of the origin in which context's
+            the metadata is valid, if any
+        :>jsonarr int visit: identifier of the visit in which context's
+            the metadata is valid, if any
+        :>jsonarr string snapshot: SWHID of the snapshot in which context's
+            the metadata is valid, if any
+        :>jsonarr string release: SWHID of the release in which context's
+            the metadata is valid, if any
+        :>jsonarr string revision: SWHID of the revision in which context's
+            the metadata is valid, if any
+        :>jsonarr string path: SWHID of the path in which context's
+            is valid if any, relative to a release or revision as anchor
+        :>jsonarr string directory: SWHID of the directory in which context's
+            the metadata is valid, if any
+
+        :statuscode 200: no error
+
+        **Example:**
+
+        .. parsed-literal::
+
+            :swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/?authority=forge%20https://pypi.org/`
+    """  # noqa
+    authority_str: str = request.query_params.get("authority")
+    after_str: str = request.query_params.get("after")
+    limit_str: str = request.query_params.get("limit", "100")
+    page_token_str: str = request.query_params.get("page_token")
+
+    if not authority_str:
+        raise BadInputExc("The 'authority' query parameter is required.")
+    if " " not in authority_str.strip():
+        raise BadInputExc("The 'authority' query parameter should contain a space.")
+
+    (authority_type_str, authority_url) = authority_str.split(" ", 1)
+    try:
+        authority_type = MetadataAuthorityType(authority_type_str)
+    except ValueError:
+        raise BadInputExc(
+            f"Invalid 'authority' type, should be one of: "
+            f"{', '.join(member.value for member in MetadataAuthorityType)}"
+        )
+    authority = MetadataAuthority(authority_type, authority_url)
+
+    if after_str:
+        try:
+            after = iso8601.parse_date(after_str)
+        except iso8601.ParseError:
+            raise BadInputExc("Invalid format for 'after' parameter.") from None
+    else:
+        after = None
+
+    try:
+        limit = int(limit_str)
+    except ValueError:
+        raise BadInputExc("'limit' parameter must be an integer.") from None
+    limit = min(limit, 10000)
+
+    try:
+        target = identifiers.CoreSWHID.from_string(target).to_extended()
+    except identifiers.ValidationError as e:
+        raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None
+
+    if page_token_str:
+        page_token = base64.urlsafe_b64decode(page_token_str)
+    else:
+        page_token = None
+
+    result_page = archive.storage.raw_extrinsic_metadata_get(
+        target=target,
+        authority=authority,
+        after=after,
+        page_token=page_token,
+        limit=limit,
+    )
+
+    results = []
+
+    for metadata in result_page.results:
+        result = converters.from_raw_extrinsic_metadata(metadata)
+
+        # We can't reliably send metadata directly, because it is a bytestring,
+        # and we have to return JSON documents.
+        result["metadata_url"] = reverse(
+            "api-1-raw-extrinsic-metadata-get",
+            url_args={"id": hashutil.hash_to_hex(metadata.id)},
+            request=request,
+        )
+
+        results.append(result)
+
+    response = {
+        "results": results,
+        "headers": {},
+    }
+    if result_page.next_page_token is not None:
+        response["headers"]["link-next"] = reverse(
+            "api-1-raw-extrinsic-metadata",
+            query_params=dict(
+                authority=authority_str,
+                after=after_str,
+                limit=limit_str,
+                page_token=base64.urlsafe_b64encode(result_page.next_page_token),
+            ),
+            request=request,
+        )
+
+    return response
+
+
+@api_route(
+    "/raw-extrinsic-metadata/get/(?P<id>[0-9a-z]+)/",
+    "api-1-raw-extrinsic-metadata-get",
+)
+def api_raw_extrinsic_metadata_get(request, id):
+    # This is an internal endpoint that should only be accessed via URLs given
+    # by /raw-extrinsic-metadata/swhid/; so it is not documented.
+    metadata = archive.storage.raw_extrinsic_metadata_get_by_ids(
+        [hashutil.hash_to_bytes(id)]
+    )
+    if not metadata:
+        raise NotFoundExc(
+            "Metadata not found. Use /raw-extrinsic-metadata/swhid/ to access metadata."
+        )
+
+    return HttpResponse(metadata[0].metadata, content_type="application/octet-stream")
diff --git a/swh/web/common/converters.py b/swh/web/common/converters.py
index 00f09bd0..3b1dcfb7 100644
--- a/swh/web/common/converters.py
+++ b/swh/web/common/converters.py
@@ -1,383 +1,395 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import json
 from typing import Any, Dict, Union
 
 from swh.core.utils import decode_with_escape
 from swh.model import hashutil
-from swh.model.model import Release, Revision
+from swh.model.model import RawExtrinsicMetadata, Release, Revision
 from swh.storage.interface import PartialBranches
 from swh.web.common.typing import OriginInfo, OriginVisitInfo
 
 
 def _group_checksums(data):
     """Groups checksums values computed from hash functions used in swh
     and stored in data dict under a single entry 'checksums'
     """
     if data:
         checksums = {}
         for hash in hashutil.ALGORITHMS:
             if hash in data and data[hash]:
                 checksums[hash] = data[hash]
                 del data[hash]
         if len(checksums) > 0:
             data["checksums"] = checksums
 
 
 def fmap(f, data):
     """Map f to data at each level.
 
     This must keep the origin data structure type:
     - map -> map
     - dict -> dict
     - list -> list
     - None -> None
 
     Args:
         f: function that expects one argument.
         data: data to traverse to apply the f function.
               list, map, dict or bare value.
 
     Returns:
         The same data-structure with modified values by the f function.
 
     """
     if data is None:
         return data
     if isinstance(data, map):
         return map(lambda y: fmap(f, y), (x for x in data))
     if isinstance(data, list):
         return [fmap(f, x) for x in data]
     if isinstance(data, tuple):
         return tuple(fmap(f, x) for x in data)
     if isinstance(data, dict):
         return {k: fmap(f, v) for (k, v) in data.items()}
     return f(data)
 
 
 def from_swh(
     dict_swh,
     hashess={},
     bytess={},
     dates={},
     blacklist={},
     removables_if_empty={},
     empty_dict={},
     empty_list={},
     convert={},
     convert_fn=lambda x: x,
 ):
     """Convert from a swh dictionary to something reasonably json
     serializable.
 
     Args:
         dict_swh: the origin dictionary needed to be transformed
         hashess: list/set of keys representing hashes values (sha1, sha256,
             sha1_git, etc...) as bytes. Those need to be transformed in
             hexadecimal string
         bytess: list/set of keys representing bytes values which needs to be
             decoded
         blacklist: set of keys to filter out from the conversion
         convert: set of keys whose associated values need to be converted using
             convert_fn
         convert_fn: the conversion function to apply on the value of key in
             'convert'
 
     The remaining keys are copied as is in the output.
 
     Returns:
         dictionary equivalent as dict_swh only with its keys converted.
 
     """
 
     def convert_hashes_bytes(v):
         """v is supposedly a hash as bytes, returns it converted in hex.
 
         """
         if isinstance(v, bytes):
             return hashutil.hash_to_hex(v)
         return v
 
     def convert_bytes(v):
         """v is supposedly a bytes string, decode as utf-8.
 
         FIXME: Improve decoding policy.
         If not utf-8, break!
 
         """
         if isinstance(v, bytes):
             return v.decode("utf-8")
         return v
 
     def convert_date(v):
         """
         Args:
             v (dict or datatime): either:
 
                 - a dict with three keys:
 
                   - timestamp (dict or integer timestamp)
                   - offset
                   - negative_utc
 
                 - or, a datetime
 
             We convert it to a human-readable string
 
         """
         if not v:
             return v
         if isinstance(v, datetime.datetime):
             return v.isoformat()
 
         tz = datetime.timezone(datetime.timedelta(minutes=v["offset"]))
         swh_timestamp = v["timestamp"]
         if isinstance(swh_timestamp, dict):
             date = datetime.datetime.fromtimestamp(swh_timestamp["seconds"], tz=tz)
         else:
             date = datetime.datetime.fromtimestamp(swh_timestamp, tz=tz)
 
         datestr = date.isoformat()
 
         if v["offset"] == 0 and v["negative_utc"]:
             # remove the rightmost + and replace it with a -
             return "-".join(datestr.rsplit("+", 1))
 
         return datestr
 
     if not dict_swh:
         return dict_swh
 
     new_dict = {}
     for key, value in dict_swh.items():
         if key in blacklist or (key in removables_if_empty and not value):
             continue
 
         if key in dates:
             new_dict[key] = convert_date(value)
         elif key in convert:
             new_dict[key] = convert_fn(value)
         elif isinstance(value, dict):
             new_dict[key] = from_swh(
                 value,
                 hashess=hashess,
                 bytess=bytess,
                 dates=dates,
                 blacklist=blacklist,
                 removables_if_empty=removables_if_empty,
                 empty_dict=empty_dict,
                 empty_list=empty_list,
                 convert=convert,
                 convert_fn=convert_fn,
             )
         elif key in hashess:
             new_dict[key] = fmap(convert_hashes_bytes, value)
         elif key in bytess:
             try:
                 new_dict[key] = fmap(convert_bytes, value)
             except UnicodeDecodeError:
                 if "decoding_failures" not in new_dict:
                     new_dict["decoding_failures"] = [key]
                 else:
                     new_dict["decoding_failures"].append(key)
                 new_dict[key] = fmap(decode_with_escape, value)
         elif key in empty_dict and not value:
             new_dict[key] = {}
         elif key in empty_list and not value:
             new_dict[key] = []
         else:
             new_dict[key] = value
 
     _group_checksums(new_dict)
 
     return new_dict
 
 
 def from_origin(origin: Dict[str, Any]) -> OriginInfo:
     """Convert from a swh origin to an origin dictionary.
 
     """
     return from_swh(origin)
 
 
 def from_release(release: Release) -> Dict[str, Any]:
     """Convert from a swh release to a json serializable release dictionary.
 
     Args:
         release: A release model object
 
     Returns:
         release dictionary with the following keys
 
         - id: hexadecimal sha1 (string)
         - revision: hexadecimal sha1 (string)
         - comment: release's comment message (string)
         - name: release's name (string)
         - author: release's author identifier (swh's id)
         - synthetic: the synthetic property (boolean)
 
     """
     return from_swh(
         release.to_dict(),
         hashess={"id", "target"},
         bytess={"message", "name", "fullname", "email"},
         dates={"date"},
     )
 
 
 class SWHMetadataEncoder(json.JSONEncoder):
     """Special json encoder for metadata field which can contain bytes
     encoded value.
 
     """
 
     def default(self, obj):
         if isinstance(obj, bytes):
             try:
                 return obj.decode("utf-8")
             except UnicodeDecodeError:
                 # fallback to binary representation to avoid display errors
                 return repr(obj)
         # Let the base class default method raise the TypeError
         return json.JSONEncoder.default(self, obj)
 
 
-def convert_revision_metadata(metadata):
+def convert_metadata(metadata):
     """Convert json specific dict to a json serializable one.
 
     """
-    if not metadata:
+    if metadata is None:
         return {}
 
     return json.loads(json.dumps(metadata, cls=SWHMetadataEncoder))
 
 
 def from_revision(revision: Union[Dict[str, Any], Revision]) -> Dict[str, Any]:
     """Convert swh revision model object to a json serializable revision dictionary.
 
     Args:
         revision: revision model object
 
     Returns:
         dict: Revision dictionary with the same keys as inputs, except:
 
         - sha1s are in hexadecimal strings (id, directory)
         - bytes are decoded in string (author_name, committer_name,
           author_email, committer_email)
 
         Remaining keys are left as is
 
     """
     if isinstance(revision, Revision):
         revision_d = revision.to_dict()
     else:
         revision_d = revision
     revision_d = from_swh(
         revision_d,
         hashess={"id", "directory", "parents", "children"},
         bytess={"name", "fullname", "email", "extra_headers", "message"},
         convert={"metadata"},
-        convert_fn=convert_revision_metadata,
+        convert_fn=convert_metadata,
         dates={"date", "committer_date"},
     )
 
     if revision_d:
         if "parents" in revision_d:
             revision_d["merge"] = len(revision_d["parents"]) > 1
 
     return revision_d
 
 
+def from_raw_extrinsic_metadata(
+    metadata: Union[Dict[str, Any], RawExtrinsicMetadata]
+) -> Dict[str, Any]:
+    """Convert RawExtrinsicMetadata model object to a json serializable dictionary.
+    """
+    return from_swh(
+        metadata.to_dict() if isinstance(metadata, RawExtrinsicMetadata) else metadata,
+        blacklist={"id", "metadata"},
+        dates={"discovery_date"},
+    )
+
+
 def from_content(content):
     """Convert swh content to serializable content dictionary.
 
     """
     return from_swh(
         content,
         hashess={"sha1", "sha1_git", "sha256", "blake2s256"},
         blacklist={"ctime"},
         convert={"status"},
         convert_fn=lambda v: "absent" if v == "hidden" else v,
     )
 
 
 def from_person(person):
     """Convert swh person to serializable person dictionary.
 
     """
     return from_swh(person, bytess={"name", "fullname", "email"})
 
 
 def from_origin_visit(visit: Dict[str, Any]) -> OriginVisitInfo:
     """Convert swh origin_visit to serializable origin_visit dictionary.
 
     """
     ov = from_swh(
         visit,
         hashess={"target", "snapshot"},
         bytess={"branch"},
         dates={"date"},
         empty_dict={"metadata"},
     )
 
     return ov
 
 
 def from_snapshot(snapshot):
     """Convert swh snapshot to serializable (partial) snapshot dictionary.
 
     """
     sv = from_swh(snapshot, hashess={"id", "target"}, bytess={"next_branch"})
 
     if sv and "branches" in sv:
         sv["branches"] = {decode_with_escape(k): v for k, v in sv["branches"].items()}
         for k, v in snapshot["branches"].items():
             # alias target existing branch names, not a sha1
             if v and v["target_type"] == "alias":
                 branch = decode_with_escape(k)
                 target = decode_with_escape(v["target"])
                 sv["branches"][branch]["target"] = target
 
     return sv
 
 
 def from_partial_branches(branches: PartialBranches):
     """Convert PartialBranches to serializable partial snapshot dictionary
 
     """
     return from_snapshot(
         {
             "id": branches["id"],
             "branches": {
                 branch_name: branch.to_dict() if branch else None
                 for (branch_name, branch) in branches["branches"].items()
             },
             "next_branch": branches["next_branch"],
         }
     )
 
 
 def from_directory_entry(dir_entry):
     """Convert swh directory to serializable directory dictionary.
 
     """
     return from_swh(
         dir_entry,
         hashess={"dir_id", "sha1_git", "sha1", "sha256", "blake2s256", "target"},
         bytess={"name"},
         removables_if_empty={"sha1", "sha1_git", "sha256", "blake2s256", "status"},
         convert={"status"},
         convert_fn=lambda v: "absent" if v == "hidden" else v,
     )
 
 
 def from_filetype(content_entry):
     """Convert swh content to serializable dictionary containing keys
     'id', 'encoding', and 'mimetype'.
 
     """
     return from_swh(content_entry, hashess={"id"})
diff --git a/swh/web/tests/api/views/test_metadata.py b/swh/web/tests/api/views/test_metadata.py
new file mode 100644
index 00000000..fe4bec6b
--- /dev/null
+++ b/swh/web/tests/api/views/test_metadata.py
@@ -0,0 +1,135 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import attr
+from hypothesis import given, strategies
+import pytest
+
+from swh.model.hypothesis_strategies import raw_extrinsic_metadata
+from swh.web.common.utils import reverse
+from swh.web.tests.api.views.utils import scroll_results
+from swh.web.tests.utils import check_api_get_responses, check_http_get_response
+
+
+@given(raw_extrinsic_metadata())
+def test_api_raw_extrinsic_metadata(api_client, archive_data, metadata):
+    archive_data.metadata_authority_add([metadata.authority])
+    archive_data.metadata_fetcher_add([metadata.fetcher])
+    archive_data.raw_extrinsic_metadata_add([metadata])
+
+    authority = metadata.authority
+    url = reverse(
+        "api-1-raw-extrinsic-metadata-swhid",
+        url_args={"target": str(metadata.target)},
+        query_params={"authority": f"{authority.type.value} {authority.url}"},
+    )
+    rv = check_api_get_responses(api_client, url, status_code=200)
+
+    assert len(rv.data) == 1
+
+    expected_result = metadata.to_dict()
+    del expected_result["id"]
+    del expected_result["metadata"]
+    metadata_url = rv.data[0]["metadata_url"]
+    expected_result["metadata_url"] = metadata_url
+    expected_result["discovery_date"] = expected_result["discovery_date"].isoformat()
+    assert rv.data == [expected_result]
+
+    rv = check_http_get_response(api_client, metadata_url, status_code=200)
+    assert rv["Content-Type"] == "application/octet-stream"
+    assert rv.content == metadata.metadata
+
+
+@pytest.mark.parametrize("limit", [1, 2, 10, 100])
+@given(strategies.sets(raw_extrinsic_metadata(), min_size=1))
+def test_api_raw_extrinsic_metadata_scroll(api_client, archive_data, limit, metadata):
+    # Make all metadata objects use the same authority and target
+    metadata0 = next(iter(metadata))
+    metadata = {
+        attr.evolve(m, authority=metadata0.authority, target=metadata0.target)
+        for m in metadata
+    }
+    authority = metadata0.authority
+
+    archive_data.metadata_authority_add([authority])
+    archive_data.metadata_fetcher_add(list({m.fetcher for m in metadata}))
+    archive_data.raw_extrinsic_metadata_add(metadata)
+
+    url = reverse(
+        "api-1-raw-extrinsic-metadata-swhid",
+        url_args={"target": str(metadata0.target)},
+        query_params={
+            "authority": f"{authority.type.value} {authority.url}",
+            "limit": limit,
+        },
+    )
+
+    results = scroll_results(api_client, url)
+
+    expected_results = [m.to_dict() for m in metadata]
+    for expected_result in expected_results:
+        del expected_result["id"]
+        del expected_result["metadata"]
+        expected_result["discovery_date"] = expected_result[
+            "discovery_date"
+        ].isoformat()
+
+    for result in results:
+        del result["metadata_url"]
+
+    assert results == expected_results
+
+
+_swhid = "swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307"
+
+
+@pytest.mark.parametrize(
+    "status_code,url_args,query_params",
+    [
+        pytest.param(
+            200,
+            {"target": _swhid},
+            {"authority": "forge http://example.org"},
+            id="minimal working",
+        ),
+        pytest.param(
+            200,
+            {"target": _swhid},
+            {
+                "authority": "forge http://example.org",
+                "after": "2021-06-18T09:31:09",
+                "limit": 100,
+            },
+            id="maximal working",
+        ),
+        pytest.param(
+            400,
+            {"target": _swhid},
+            {"authority": "foo http://example.org"},
+            id="invalid authority type",
+        ),
+        pytest.param(
+            400,
+            {"target": _swhid},
+            {"authority": "forge http://example.org", "after": "yesterday",},
+            id="invalid 'after' format",
+        ),
+        pytest.param(
+            400,
+            {"target": _swhid},
+            {"authority": "forge http://example.org", "limit": "abc",},
+            id="invalid 'limit'",
+        ),
+    ],
+)
+def test_api_raw_extrinsic_metadata_check_params(
+    api_client, archive_data, status_code, url_args, query_params
+):
+    url = reverse(
+        "api-1-raw-extrinsic-metadata-swhid",
+        url_args=url_args,
+        query_params=query_params,
+    )
+    check_api_get_responses(api_client, url, status_code=status_code)
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
index 4dc42be2..8f3439d6 100644
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -1,706 +1,681 @@
-# Copyright (C) 2015-2020  The Software Heritage developers
+# Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import timedelta
 import json
 
 from hypothesis import given
 import pytest
-from requests.utils import parse_header_links
 
 from swh.indexer.storage.model import OriginIntrinsicMetadataRow
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Origin, OriginVisit, OriginVisitStatus
 from swh.storage.exc import StorageAPIError, StorageDBError
 from swh.storage.utils import now
 from swh.web.api.utils import enrich_origin, enrich_origin_visit
 from swh.web.common.exc import BadInputExc
 from swh.web.common.origin_visits import get_origin_visits
 from swh.web.common.utils import reverse
+from swh.web.tests.api.views.utils import scroll_results
 from swh.web.tests.data import (
     INDEXER_TOOL,
     ORIGIN_MASTER_REVISION,
     ORIGIN_METADATA_KEY,
     ORIGIN_METADATA_VALUE,
 )
 from swh.web.tests.strategies import new_origin, new_snapshots, origin, visit_dates
 from swh.web.tests.utils import check_api_get_responses
 
 
-def _scroll_results(api_client, url):
-    """Iterates through pages of results, and returns them all."""
-    results = []
-
-    while True:
-        rv = check_api_get_responses(api_client, url, status_code=200)
-
-        results.extend(rv.data)
-
-        if "Link" in rv:
-            for link in parse_header_links(rv["Link"]):
-                if link["rel"] == "next":
-                    # Found link to next page of results
-                    url = link["url"]
-                    break
-            else:
-                # No link with 'rel=next'
-                break
-        else:
-            # No Link header
-            break
-
-    return results
-
-
 def test_api_lookup_origin_visits_raise_error(api_client, mocker):
     mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
     err_msg = "voluntary error to check the bad request middleware."
 
     mock_get_origin_visits.side_effect = BadInputExc(err_msg)
 
     url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"})
     rv = check_api_get_responses(api_client, url, status_code=400)
     assert rv.data == {"exception": "BadInputExc", "reason": err_msg}
 
 
 def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, mocker):
     mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
     err_msg = "Storage exploded! Will be back online shortly!"
 
     mock_get_origin_visits.side_effect = StorageDBError(err_msg)
 
     url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"})
     rv = check_api_get_responses(api_client, url, status_code=503)
     assert rv.data == {
         "exception": "StorageDBError",
         "reason": "An unexpected error occurred in the backend: %s" % err_msg,
     }
 
 
 def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, mocker):
     mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits")
     err_msg = "Storage API dropped dead! Will resurrect asap!"
 
     mock_get_origin_visits.side_effect = StorageAPIError(err_msg)
 
     url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"})
     rv = check_api_get_responses(api_client, url, status_code=503)
     assert rv.data == {
         "exception": "StorageAPIError",
         "reason": "An unexpected error occurred in the api backend: %s" % err_msg,
     }
 
 
 @given(new_origin(), visit_dates(3), new_snapshots(3))
 def test_api_lookup_origin_visits(
     api_client, archive_data, new_origin, visit_dates, new_snapshots
 ):
 
     archive_data.origin_add([new_origin])
     for i, visit_date in enumerate(visit_dates):
         origin_visit = archive_data.origin_visit_add(
             [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)]
         )[0]
         archive_data.snapshot_add([new_snapshots[i]])
         visit_status = OriginVisitStatus(
             origin=new_origin.url,
             visit=origin_visit.visit,
             date=now(),
             status="full",
             snapshot=new_snapshots[i].id,
         )
         archive_data.origin_visit_status_add([visit_status])
 
     all_visits = list(reversed(get_origin_visits(new_origin.to_dict())))
 
     for last_visit, expected_visits in (
         (None, all_visits[:2]),
         (all_visits[1]["visit"], all_visits[2:]),
     ):
 
         url = reverse(
             "api-1-origin-visits",
             url_args={"origin_url": new_origin.url},
             query_params={"per_page": 2, "last_visit": last_visit},
         )
 
         rv = check_api_get_responses(api_client, url, status_code=200)
 
         for i in range(len(expected_visits)):
             expected_visits[i] = enrich_origin_visit(
                 expected_visits[i],
                 with_origin_link=False,
                 with_origin_visit_link=True,
                 request=rv.wsgi_request,
             )
 
         assert rv.data == expected_visits
 
 
 @given(new_origin(), visit_dates(3), new_snapshots(3))
 def test_api_lookup_origin_visits_by_id(
     api_client, archive_data, new_origin, visit_dates, new_snapshots
 ):
     archive_data.origin_add([new_origin])
     for i, visit_date in enumerate(visit_dates):
         origin_visit = archive_data.origin_visit_add(
             [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)]
         )[0]
         archive_data.snapshot_add([new_snapshots[i]])
         visit_status = OriginVisitStatus(
             origin=new_origin.url,
             visit=origin_visit.visit,
             date=now(),
             status="full",
             snapshot=new_snapshots[i].id,
         )
         archive_data.origin_visit_status_add([visit_status])
 
     all_visits = list(reversed(get_origin_visits(new_origin.to_dict())))
 
     for last_visit, expected_visits in (
         (None, all_visits[:2]),
         (all_visits[1]["visit"], all_visits[2:4]),
     ):
 
         url = reverse(
             "api-1-origin-visits",
             url_args={"origin_url": new_origin.url},
             query_params={"per_page": 2, "last_visit": last_visit},
         )
 
         rv = check_api_get_responses(api_client, url, status_code=200)
 
         for i in range(len(expected_visits)):
             expected_visits[i] = enrich_origin_visit(
                 expected_visits[i],
                 with_origin_link=False,
                 with_origin_visit_link=True,
                 request=rv.wsgi_request,
             )
 
         assert rv.data == expected_visits
 
 
 @given(new_origin(), visit_dates(3), new_snapshots(3))
 def test_api_lookup_origin_visit(
     api_client, archive_data, new_origin, visit_dates, new_snapshots
 ):
     archive_data.origin_add([new_origin])
     for i, visit_date in enumerate(visit_dates):
         origin_visit = archive_data.origin_visit_add(
             [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)]
         )[0]
         visit_id = origin_visit.visit
         archive_data.snapshot_add([new_snapshots[i]])
         visit_status = OriginVisitStatus(
             origin=new_origin.url,
             visit=origin_visit.visit,
             date=visit_date + timedelta(minutes=5),
             status="full",
             snapshot=new_snapshots[i].id,
         )
         archive_data.origin_visit_status_add([visit_status])
         url = reverse(
             "api-1-origin-visit",
             url_args={"origin_url": new_origin.url, "visit_id": visit_id},
         )
 
         rv = check_api_get_responses(api_client, url, status_code=200)
 
         expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_id)
 
         expected_visit = enrich_origin_visit(
             expected_visit,
             with_origin_link=True,
             with_origin_visit_link=False,
             request=rv.wsgi_request,
         )
 
         assert rv.data == expected_visit
 
 
 @given(new_origin())
 def test_api_lookup_origin_visit_latest_no_visit(api_client, archive_data, new_origin):
     archive_data.origin_add([new_origin])
 
     url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url})
 
     rv = check_api_get_responses(api_client, url, status_code=404)
     assert rv.data == {
         "exception": "NotFoundExc",
         "reason": "No visit for origin %s found" % new_origin.url,
     }
 
 
 @given(new_origin(), visit_dates(2), new_snapshots(1))
 def test_api_lookup_origin_visit_latest(
     api_client, archive_data, new_origin, visit_dates, new_snapshots
 ):
     archive_data.origin_add([new_origin])
     visit_dates.sort()
     visit_ids = []
     for i, visit_date in enumerate(visit_dates):
         origin_visit = archive_data.origin_visit_add(
             [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)]
         )[0]
         visit_ids.append(origin_visit.visit)
 
     archive_data.snapshot_add([new_snapshots[0]])
 
     visit_status = OriginVisitStatus(
         origin=new_origin.url,
         visit=visit_ids[0],
         date=now(),
         status="full",
         snapshot=new_snapshots[0].id,
     )
     archive_data.origin_visit_status_add([visit_status])
 
     url = reverse("api-1-origin-visit-latest", url_args={"origin_url": new_origin.url})
 
     rv = check_api_get_responses(api_client, url, status_code=200)
 
     expected_visit = archive_data.origin_visit_get_by(new_origin.url, visit_ids[1])
 
     expected_visit = enrich_origin_visit(
         expected_visit,
         with_origin_link=True,
         with_origin_visit_link=False,
         request=rv.wsgi_request,
     )
 
     assert rv.data == expected_visit
 
 
 @given(new_origin(), visit_dates(2), new_snapshots(1))
 def test_api_lookup_origin_visit_latest_with_snapshot(
     api_client, archive_data, new_origin, visit_dates, new_snapshots
 ):
     archive_data.origin_add([new_origin])
     visit_dates.sort()
     visit_ids = []
     for i, visit_date in enumerate(visit_dates):
         origin_visit = archive_data.origin_visit_add(
             [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)]
         )[0]
         visit_ids.append(origin_visit.visit)
 
     archive_data.snapshot_add([new_snapshots[0]])
 
     # Add snapshot to the latest visit
     visit_id = visit_ids[-1]
     visit_status = OriginVisitStatus(
         origin=new_origin.url,
         visit=visit_id,
         date=now(),
         status="full",
         snapshot=new_snapshots[0].id,
     )
     archive_data.origin_visit_status_add([visit_status])
 
     url = reverse(
         "api-1-origin-visit-latest",
         url_args={"origin_url": new_origin.url},
         query_params={"require_snapshot": True},
     )
 
     rv = check_api_get_responses(api_client, url, status_code=200)
 
     expected_visit = archive_data.origin_visit_status_get_latest(
         new_origin.url, type="git", require_snapshot=True
     )
 
     expected_visit = enrich_origin_visit(
         expected_visit,
         with_origin_link=True,
         with_origin_visit_link=False,
         request=rv.wsgi_request,
     )
 
     assert rv.data == expected_visit
 
 
 @given(origin())
 def test_api_lookup_origin_visit_not_found(api_client, origin):
 
     all_visits = list(reversed(get_origin_visits(origin)))
 
     max_visit_id = max([v["visit"] for v in all_visits])
 
     url = reverse(
         "api-1-origin-visit",
         url_args={"origin_url": origin["url"], "visit_id": max_visit_id + 1},
     )
 
     rv = check_api_get_responses(api_client, url, status_code=404)
     assert rv.data == {
         "exception": "NotFoundExc",
         "reason": "Origin %s or its visit with id %s not found!"
         % (origin["url"], max_visit_id + 1),
     }
 
 
 def test_api_origins_wrong_input(api_client, archive_data):
     """Should fail with 400 if the input is deprecated.
 
     """
     # fail if wrong input
     url = reverse("api-1-origins", query_params={"origin_from": 1})
     rv = check_api_get_responses(api_client, url, status_code=400)
 
     assert rv.data == {
         "exception": "BadInputExc",
         "reason": "Please use the Link header to browse through result",
     }
 
 
 def test_api_origins(api_client, archive_data):
     page_result = archive_data.origin_list(limit=10000)
     origins = page_result.results
     origin_urls = {origin.url for origin in origins}
 
     # Get only one
     url = reverse("api-1-origins", query_params={"origin_count": 1})
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == 1
     assert {origin["url"] for origin in rv.data} <= origin_urls
 
     # Get all
     url = reverse("api-1-origins", query_params={"origin_count": len(origins)})
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == len(origins)
     assert {origin["url"] for origin in rv.data} == origin_urls
 
     # Get "all + 10"
     url = reverse("api-1-origins", query_params={"origin_count": len(origins) + 10})
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == len(origins)
     assert {origin["url"] for origin in rv.data} == origin_urls
 
 
 @pytest.mark.parametrize("origin_count", [1, 2, 10, 100])
 def test_api_origins_scroll(api_client, archive_data, origin_count):
     page_result = archive_data.origin_list(limit=10000)
     origins = page_result.results
     origin_urls = {origin.url for origin in origins}
 
     url = reverse("api-1-origins", query_params={"origin_count": origin_count})
 
-    results = _scroll_results(api_client, url)
+    results = scroll_results(api_client, url)
 
     assert len(results) == len(origins)
     assert {origin["url"] for origin in results} == origin_urls
 
 
 @given(origin())
 def test_api_origin_by_url(api_client, archive_data, origin):
     origin_url = origin["url"]
     url = reverse("api-1-origin", url_args={"origin_url": origin_url})
     rv = check_api_get_responses(api_client, url, status_code=200)
     expected_origin = archive_data.origin_get([origin_url])[0]
     expected_origin = enrich_origin(expected_origin, rv.wsgi_request)
 
     assert rv.data == expected_origin
 
 
 @given(new_origin())
 def test_api_origin_not_found(api_client, new_origin):
 
     url = reverse("api-1-origin", url_args={"origin_url": new_origin.url})
     rv = check_api_get_responses(api_client, url, status_code=404)
     assert rv.data == {
         "exception": "NotFoundExc",
         "reason": "Origin with url %s not found!" % new_origin.url,
     }
 
 
 @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
 def test_api_origin_search(api_client, mocker, backend):
     if backend != "swh-search":
         # equivalent to not configuring search in the config
         mocker.patch("swh.web.common.archive.search", None)
 
     expected_origins = {
         "https://github.com/wcoder/highlightjs-line-numbers.js",
         "https://github.com/memononen/libtess2",
     }
 
     # Search for 'github.com', get only one
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github.com"},
         query_params={"limit": 1},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == 1
     assert {origin["url"] for origin in rv.data} <= expected_origins
     assert rv.data == [
         enrich_origin({"url": origin["url"]}, request=rv.wsgi_request)
         for origin in rv.data
     ]
 
     # Search for 'github.com', get all
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github.com"},
         query_params={"limit": 2},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert {origin["url"] for origin in rv.data} == expected_origins
     assert rv.data == [
         enrich_origin({"url": origin["url"]}, request=rv.wsgi_request)
         for origin in rv.data
     ]
 
     # Search for 'github.com', get more than available
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github.com"},
         query_params={"limit": 10},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert {origin["url"] for origin in rv.data} == expected_origins
     assert rv.data == [
         enrich_origin({"url": origin["url"]}, request=rv.wsgi_request)
         for origin in rv.data
     ]
 
 
 @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
 def test_api_origin_search_words(api_client, mocker, backend):
     if backend != "swh-search":
         # equivalent to not configuring search in the config
         mocker.patch("swh.web.common.archive.search", None)
 
     expected_origins = {
         "https://github.com/wcoder/highlightjs-line-numbers.js",
         "https://github.com/memononen/libtess2",
     }
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github com"},
         query_params={"limit": 2},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert {origin["url"] for origin in rv.data} == expected_origins
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "com github"},
         query_params={"limit": 2},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert {origin["url"] for origin in rv.data} == expected_origins
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "memononen libtess2"},
         query_params={"limit": 2},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == 1
     assert {origin["url"] for origin in rv.data} == {
         "https://github.com/memononen/libtess2"
     }
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "libtess2 memononen"},
         query_params={"limit": 2},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == 1
     assert {origin["url"] for origin in rv.data} == {
         "https://github.com/memononen/libtess2"
     }
 
 
 @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
 def test_api_origin_search_visit_type(api_client, mocker, backend):
     if backend != "swh-search":
         # equivalent to not configuring search in the config
         mocker.patch("swh.web.common.archive.search", None)
 
     expected_origins = {
         "https://github.com/wcoder/highlightjs-line-numbers.js",
         "https://github.com/memononen/libtess2",
     }
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github com",},
         query_params={"visit_type": "git"},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert {origin["url"] for origin in rv.data} == expected_origins
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github com",},
         query_params={"visit_type": "foo"},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert rv.data == []
 
 
 @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
 @pytest.mark.parametrize("limit", [1, 2, 3, 10])
 def test_api_origin_search_scroll(api_client, archive_data, mocker, limit, backend):
 
     if backend != "swh-search":
         # equivalent to not configuring search in the config
         mocker.patch("swh.web.common.archive.search", None)
 
     expected_origins = {
         "https://github.com/wcoder/highlightjs-line-numbers.js",
         "https://github.com/memononen/libtess2",
     }
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "github.com"},
         query_params={"limit": limit},
     )
 
-    results = _scroll_results(api_client, url)
+    results = scroll_results(api_client, url)
 
     assert {origin["url"] for origin in results} == expected_origins
 
 
 @pytest.mark.parametrize("backend", ["swh-search", "swh-storage"])
 def test_api_origin_search_limit(api_client, archive_data, tests_data, mocker, backend):
     if backend == "swh-search":
         tests_data["search"].origin_update(
             [{"url": "http://foobar/{}".format(i)} for i in range(2000)]
         )
     else:
         # equivalent to not configuring search in the config
         mocker.patch("swh.web.common.archive.search", None)
 
         archive_data.origin_add(
             [Origin(url="http://foobar/{}".format(i)) for i in range(2000)]
         )
 
     url = reverse(
         "api-1-origin-search",
         url_args={"url_pattern": "foobar"},
         query_params={"limit": 1050},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == 1000
 
 
 @pytest.mark.parametrize("backend", ["swh-search", "swh-indexer-storage"])
 def test_api_origin_metadata_search(api_client, mocker, backend):
 
     mock_config = mocker.patch("swh.web.common.archive.config")
     mock_config.get_config.return_value = {"metadata_search_backend": backend}
 
     url = reverse(
         "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE}
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
 
     expected_data = [
         {
             "url": origin_url,
             "metadata": {
                 "from_revision": master_rev,
                 "tool": {
                     "name": INDEXER_TOOL["tool_name"],
                     "version": INDEXER_TOOL["tool_version"],
                     "configuration": INDEXER_TOOL["tool_configuration"],
                     "id": INDEXER_TOOL["id"],
                 },
                 "mappings": [],
             },
         }
         for origin_url, master_rev in ORIGIN_MASTER_REVISION.items()
     ]
 
     for i in range(len(expected_data)):
         expected = expected_data[i]
         response = rv.data[i]
         metadata = response["metadata"].pop("metadata")
 
         assert any(
             [ORIGIN_METADATA_VALUE in json.dumps(val) for val in metadata.values()]
         )
         assert response == expected
 
 
 def test_api_origin_metadata_search_limit(api_client, mocker):
     mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage")
     oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext
 
     oimsft.side_effect = lambda conjunction, limit: [
         OriginIntrinsicMetadataRow(
             id=origin_url,
             from_revision=hash_to_bytes(master_rev),
             indexer_configuration_id=INDEXER_TOOL["id"],
             metadata={ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE},
             mappings=[],
         )
         for origin_url, master_rev in ORIGIN_MASTER_REVISION.items()
     ]
 
     url = reverse(
         "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE}
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == len(ORIGIN_MASTER_REVISION)
     oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=70)
 
     url = reverse(
         "api-1-origin-metadata-search",
         query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 10},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == len(ORIGIN_MASTER_REVISION)
     oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=10)
 
     url = reverse(
         "api-1-origin-metadata-search",
         query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 987},
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
     assert len(rv.data) == len(ORIGIN_MASTER_REVISION)
     oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=100)
 
 
 @given(origin())
 def test_api_origin_intrinsic_metadata(api_client, origin):
 
     url = reverse(
         "api-origin-intrinsic-metadata", url_args={"origin_url": origin["url"]}
     )
     rv = check_api_get_responses(api_client, url, status_code=200)
 
     expected_data = {ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}
     assert rv.data == expected_data
 
 
 def test_api_origin_metadata_search_invalid(api_client, mocker):
     mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage")
     url = reverse("api-1-origin-metadata-search")
     check_api_get_responses(api_client, url, status_code=400)
     mock_idx_storage.assert_not_called()
 
 
 @pytest.mark.parametrize("backend", ["swh-counters", "swh-storage"])
 def test_api_stat_counters(api_client, mocker, backend):
 
     mock_config = mocker.patch("swh.web.common.archive.config")
     mock_config.get_config.return_value = {"counters_backend": backend}
 
     url = reverse("api-1-stat-counters")
     rv = check_api_get_responses(api_client, url, status_code=200)
 
     counts = json.loads(rv.content)
 
     for obj in ["content", "origin", "release", "directory", "revision"]:
         assert counts.get(obj, 0) > 0
diff --git a/swh/web/tests/api/views/utils.py b/swh/web/tests/api/views/utils.py
new file mode 100644
index 00000000..9887dba3
--- /dev/null
+++ b/swh/web/tests/api/views/utils.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2015-2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from requests.utils import parse_header_links
+
+from swh.web.tests.utils import check_api_get_responses
+
+
+def scroll_results(api_client, url):
+    """Iterates through pages of results, and returns them all."""
+    results = []
+
+    while True:
+        rv = check_api_get_responses(api_client, url, status_code=200)
+
+        results.extend(rv.data)
+
+        if "Link" in rv:
+            for link in parse_header_links(rv["Link"]):
+                if link["rel"] == "next":
+                    # Found link to next page of results
+                    url = link["url"]
+                    break
+            else:
+                # No link with 'rel=next'
+                break
+        else:
+            # No Link header
+            break
+
+    return results
diff --git a/swh/web/tests/common/test_converters.py b/swh/web/tests/common/test_converters.py
index a4cc597c..53ed0f86 100644
--- a/swh/web/tests/common/test_converters.py
+++ b/swh/web/tests/common/test_converters.py
@@ -1,757 +1,757 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU Affero General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 
 from swh.model import hashutil
 from swh.model.model import (
     ObjectType,
     Person,
     Release,
     Revision,
     RevisionType,
     Timestamp,
     TimestampWithTimezone,
 )
 from swh.web.common import converters
 
 
 def test_fmap():
     assert [2, 3, None, 4] == converters.fmap(lambda x: x + 1, [1, 2, None, 3])
     assert [11, 12, 13] == list(
         converters.fmap(lambda x: x + 10, map(lambda x: x, [1, 2, 3]))
     )
     assert {"a": 2, "b": 4} == converters.fmap(lambda x: x * 2, {"a": 1, "b": 2})
     assert 100 == converters.fmap(lambda x: x * 10, 10)
     assert {"a": [2, 6], "b": 4} == converters.fmap(
         lambda x: x * 2, {"a": [1, 3], "b": 2}
     )
     assert converters.fmap(lambda x: x, None) is None
 
 
 def test_from_swh():
     some_input = {
         "a": "something",
         "b": "someone",
         "c": b"sharp-0.3.4.tgz",
         "d": hashutil.hash_to_bytes("b04caf10e9535160d90e874b45aa426de762f19f"),
         "e": b"sharp.html/doc_002dS_005fISREG.html",
         "g": [b"utf-8-to-decode", b"another-one"],
         "h": "something filtered",
         "i": {"e": b"something"},
         "j": {
             "k": {
                 "l": [b"bytes thing", b"another thingy", b""],
                 "n": "don't care either",
             },
             "m": "don't care",
         },
         "o": "something",
         "p": b"foo",
         "q": {"extra-headers": [["a", b"intact"]]},
         "w": None,
         "r": {"p": "also intact", "q": "bar"},
         "s": {"timestamp": 42, "offset": -420, "negative_utc": None,},
         "s1": {
             "timestamp": {"seconds": 42, "microseconds": 0},
             "offset": -420,
             "negative_utc": None,
         },
         "s2": datetime.datetime(2013, 7, 1, 20, 0, 0, tzinfo=datetime.timezone.utc),
         "t": None,
         "u": None,
         "v": None,
         "x": None,
     }
 
     expected_output = {
         "a": "something",
         "b": "someone",
         "c": "sharp-0.3.4.tgz",
         "d": "b04caf10e9535160d90e874b45aa426de762f19f",
         "e": "sharp.html/doc_002dS_005fISREG.html",
         "g": ["utf-8-to-decode", "another-one"],
         "i": {"e": "something"},
         "j": {"k": {"l": ["bytes thing", "another thingy", ""]}},
         "p": "foo",
         "q": {"extra-headers": [["a", "intact"]]},
         "w": {},
         "r": {"p": "also intact", "q": "bar"},
         "s": "1969-12-31T17:00:42-07:00",
         "s1": "1969-12-31T17:00:42-07:00",
         "s2": "2013-07-01T20:00:00+00:00",
         "u": {},
         "v": [],
         "x": None,
     }
 
     actual_output = converters.from_swh(
         some_input,
         hashess={"d", "o", "x"},
         bytess={"c", "e", "g", "l"},
         dates={"s", "s1", "s2"},
         blacklist={"h", "m", "n", "o"},
         removables_if_empty={"t"},
         empty_dict={"u"},
         empty_list={"v"},
         convert={"p", "q", "w"},
-        convert_fn=converters.convert_revision_metadata,
+        convert_fn=converters.convert_metadata,
     )
 
     assert expected_output == actual_output
 
 
 def test_from_swh_edge_cases_do_no_conversion_if_none_or_not_bytes():
     some_input = {"a": "something", "b": None, "c": "someone", "d": None, "e": None}
 
     expected_output = {
         "a": "something",
         "b": None,
         "c": "someone",
         "d": None,
         "e": None,
     }
 
     actual_output = converters.from_swh(
         some_input, hashess={"a", "b"}, bytess={"c", "d"}, dates={"e"}
     )
 
     assert expected_output == actual_output
 
 
 def test_from_swh_edge_cases_convert_invalid_utf8_bytes():
     some_input = {
         "a": "something",
         "b": "someone",
         "c": b"a name \xff",
         "d": b"an email \xff",
     }
 
     expected_output = {
         "a": "something",
         "b": "someone",
         "c": "a name \\xff",
         "d": "an email \\xff",
         "decoding_failures": ["c", "d"],
     }
 
     actual_output = converters.from_swh(
         some_input, hashess={"a", "b"}, bytess={"c", "d"}
     )
     for v in ["a", "b", "c", "d"]:
         assert expected_output[v] == actual_output[v]
     assert len(expected_output["decoding_failures"]) == len(
         actual_output["decoding_failures"]
     )
     for v in expected_output["decoding_failures"]:
         assert v in actual_output["decoding_failures"]
 
 
 def test_from_swh_empty():
     assert {} == converters.from_swh({})
 
 
 def test_from_swh_none():
     assert converters.from_swh(None) is None
 
 
 def test_from_origin():
     origin_input = {
         "id": 9,
         "type": "ftp",
         "url": "rsync://ftp.gnu.org/gnu/octave",
     }
 
     expected_origin = {
         "id": 9,
         "type": "ftp",
         "url": "rsync://ftp.gnu.org/gnu/octave",
     }
 
     actual_origin = converters.from_origin(origin_input)
 
     assert actual_origin == expected_origin
 
 
 def test_from_origin_visit():
     snap_hash = "b5f0b7f716735ebffe38505c60145c4fd9da6ca3"
 
     for snap in [snap_hash, None]:
         visit = {
             "date": {
                 "timestamp": datetime.datetime(
                     2015, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
                 ).timestamp(),
                 "offset": 0,
                 "negative_utc": False,
             },
             "origin": 10,
             "visit": 100,
             "metadata": None,
             "status": "full",
             "snapshot": hashutil.hash_to_bytes(snap) if snap else snap,
         }
 
         expected_visit = {
             "date": "2015-01-01T22:00:00+00:00",
             "origin": 10,
             "visit": 100,
             "metadata": {},
             "status": "full",
             "snapshot": snap_hash if snap else snap,
         }
 
         actual_visit = converters.from_origin_visit(visit)
 
         assert actual_visit == expected_visit
 
 
 def test_from_release():
     """Convert release model object to a dict should be ok"""
     ts = int(
         datetime.datetime(
             2015, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
         ).timestamp()
     )
     release_input = Release(
         id=hashutil.hash_to_bytes("aad23fa492a0c5fed0708a6703be875448c86884"),
         target=hashutil.hash_to_bytes("5e46d564378afc44b31bb89f99d5675195fbdf67"),
         target_type=ObjectType.REVISION,
         date=TimestampWithTimezone(
             timestamp=Timestamp(seconds=ts, microseconds=0),
             offset=0,
             negative_utc=False,
         ),
         author=Person(
             name=b"author name",
             fullname=b"Author Name author@email",
             email=b"author@email",
         ),
         name=b"v0.0.1",
         message=b"some comment on release",
         synthetic=True,
     )
 
     expected_release = {
         "id": "aad23fa492a0c5fed0708a6703be875448c86884",
         "target": "5e46d564378afc44b31bb89f99d5675195fbdf67",
         "target_type": "revision",
         "date": "2015-01-01T22:00:00+00:00",
         "author": {
             "name": "author name",
             "fullname": "Author Name author@email",
             "email": "author@email",
         },
         "name": "v0.0.1",
         "message": "some comment on release",
         "target_type": "revision",
         "synthetic": True,
     }
 
     actual_release = converters.from_release(release_input)
 
     assert actual_release == expected_release
 
 
 def test_from_revision_model_object():
     ts = int(
         datetime.datetime(
             2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc
         ).timestamp()
     )
     revision_input = Revision(
         directory=hashutil.hash_to_bytes("7834ef7e7c357ce2af928115c6c6a42b7e2a44e6"),
         author=Person(
             name=b"Software Heritage",
             fullname=b"robot robot@softwareheritage.org",
             email=b"robot@softwareheritage.org",
         ),
         committer=Person(
             name=b"Software Heritage",
             fullname=b"robot robot@softwareheritage.org",
             email=b"robot@softwareheritage.org",
         ),
         message=b"synthetic revision message",
         date=TimestampWithTimezone(
             timestamp=Timestamp(seconds=ts, microseconds=0),
             offset=0,
             negative_utc=False,
         ),
         committer_date=TimestampWithTimezone(
             timestamp=Timestamp(seconds=ts, microseconds=0),
             offset=0,
             negative_utc=False,
         ),
         synthetic=True,
         type=RevisionType.TAR,
         parents=tuple(
             [
                 hashutil.hash_to_bytes("29d8be353ed3480476f032475e7c244eff7371d5"),
                 hashutil.hash_to_bytes("30d8be353ed3480476f032475e7c244eff7371d5"),
             ]
         ),
         extra_headers=((b"gpgsig", b"some-signature"),),
         metadata={
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ],
         },
     )
 
     expected_revision = {
         "id": "a001358278a0d811fe7072463f805da601121c2a",
         "directory": "7834ef7e7c357ce2af928115c6c6a42b7e2a44e6",
         "author": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "committer": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "message": "synthetic revision message",
         "date": "2000-01-17T11:23:54+00:00",
         "committer_date": "2000-01-17T11:23:54+00:00",
         "parents": tuple(
             [
                 "29d8be353ed3480476f032475e7c244eff7371d5",
                 "30d8be353ed3480476f032475e7c244eff7371d5",
             ]
         ),
         "type": "tar",
         "synthetic": True,
         "extra_headers": (("gpgsig", "some-signature"),),
         "metadata": {
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ],
         },
         "merge": True,
     }
 
     actual_revision = converters.from_revision(revision_input)
 
     assert actual_revision == expected_revision
 
 
 def test_from_revision():
     ts = datetime.datetime(
         2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc
     ).timestamp()
     revision_input = {
         "id": hashutil.hash_to_bytes("18d8be353ed3480476f032475e7c233eff7371d5"),
         "directory": hashutil.hash_to_bytes("7834ef7e7c357ce2af928115c6c6a42b7e2a44e6"),
         "author": {
             "name": b"Software Heritage",
             "fullname": b"robot robot@softwareheritage.org",
             "email": b"robot@softwareheritage.org",
         },
         "committer": {
             "name": b"Software Heritage",
             "fullname": b"robot robot@softwareheritage.org",
             "email": b"robot@softwareheritage.org",
         },
         "message": b"synthetic revision message",
         "date": {"timestamp": ts, "offset": 0, "negative_utc": False,},
         "committer_date": {"timestamp": ts, "offset": 0, "negative_utc": False,},
         "synthetic": True,
         "type": "tar",
         "parents": [
             hashutil.hash_to_bytes("29d8be353ed3480476f032475e7c244eff7371d5"),
             hashutil.hash_to_bytes("30d8be353ed3480476f032475e7c244eff7371d5"),
         ],
         "children": [
             hashutil.hash_to_bytes("123546353ed3480476f032475e7c244eff7371d5"),
         ],
         "metadata": {
             "extra_headers": [["gpgsig", b"some-signature"]],
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ],
         },
     }
 
     expected_revision = {
         "id": "18d8be353ed3480476f032475e7c233eff7371d5",
         "directory": "7834ef7e7c357ce2af928115c6c6a42b7e2a44e6",
         "author": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "committer": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "message": "synthetic revision message",
         "date": "2000-01-17T11:23:54+00:00",
         "committer_date": "2000-01-17T11:23:54+00:00",
         "children": ["123546353ed3480476f032475e7c244eff7371d5"],
         "parents": [
             "29d8be353ed3480476f032475e7c244eff7371d5",
             "30d8be353ed3480476f032475e7c244eff7371d5",
         ],
         "type": "tar",
         "synthetic": True,
         "metadata": {
             "extra_headers": [["gpgsig", "some-signature"]],
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ],
         },
         "merge": True,
     }
 
     actual_revision = converters.from_revision(revision_input)
 
     assert actual_revision == expected_revision
 
 
 def test_from_revision_nomerge():
     revision_input = {
         "id": hashutil.hash_to_bytes("18d8be353ed3480476f032475e7c233eff7371d5"),
         "parents": [hashutil.hash_to_bytes("29d8be353ed3480476f032475e7c244eff7371d5")],
     }
 
     expected_revision = {
         "id": "18d8be353ed3480476f032475e7c233eff7371d5",
         "parents": ["29d8be353ed3480476f032475e7c244eff7371d5"],
         "merge": False,
     }
 
     actual_revision = converters.from_revision(revision_input)
 
     assert actual_revision == expected_revision
 
 
 def test_from_revision_noparents():
     revision_input = {
         "id": hashutil.hash_to_bytes("18d8be353ed3480476f032475e7c233eff7371d5"),
         "directory": hashutil.hash_to_bytes("7834ef7e7c357ce2af928115c6c6a42b7e2a44e6"),
         "author": {
             "name": b"Software Heritage",
             "fullname": b"robot robot@softwareheritage.org",
             "email": b"robot@softwareheritage.org",
         },
         "committer": {
             "name": b"Software Heritage",
             "fullname": b"robot robot@softwareheritage.org",
             "email": b"robot@softwareheritage.org",
         },
         "message": b"synthetic revision message",
         "date": {
             "timestamp": datetime.datetime(
                 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc
             ).timestamp(),
             "offset": 0,
             "negative_utc": False,
         },
         "committer_date": {
             "timestamp": datetime.datetime(
                 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc
             ).timestamp(),
             "offset": 0,
             "negative_utc": False,
         },
         "synthetic": True,
         "type": "tar",
         "children": [
             hashutil.hash_to_bytes("123546353ed3480476f032475e7c244eff7371d5"),
         ],
         "metadata": {
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ]
         },
     }
 
     expected_revision = {
         "id": "18d8be353ed3480476f032475e7c233eff7371d5",
         "directory": "7834ef7e7c357ce2af928115c6c6a42b7e2a44e6",
         "author": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "committer": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "message": "synthetic revision message",
         "date": "2000-01-17T11:23:54+00:00",
         "committer_date": "2000-01-17T11:23:54+00:00",
         "children": ["123546353ed3480476f032475e7c244eff7371d5"],
         "type": "tar",
         "synthetic": True,
         "metadata": {
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ]
         },
     }
 
     actual_revision = converters.from_revision(revision_input)
 
     assert actual_revision == expected_revision
 
 
 def test_from_revision_invalid():
     revision_input = {
         "id": hashutil.hash_to_bytes("18d8be353ed3480476f032475e7c233eff7371d5"),
         "directory": hashutil.hash_to_bytes("7834ef7e7c357ce2af928115c6c6a42b7e2a44e6"),
         "author": {
             "name": b"Software Heritage",
             "fullname": b"robot robot@softwareheritage.org",
             "email": b"robot@softwareheritage.org",
         },
         "committer": {
             "name": b"Software Heritage",
             "fullname": b"robot robot@softwareheritage.org",
             "email": b"robot@softwareheritage.org",
         },
         "message": b"invalid message \xff",
         "date": {
             "timestamp": datetime.datetime(
                 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc
             ).timestamp(),
             "offset": 0,
             "negative_utc": False,
         },
         "committer_date": {
             "timestamp": datetime.datetime(
                 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc
             ).timestamp(),
             "offset": 0,
             "negative_utc": False,
         },
         "synthetic": True,
         "type": "tar",
         "parents": [
             hashutil.hash_to_bytes("29d8be353ed3480476f032475e7c244eff7371d5"),
             hashutil.hash_to_bytes("30d8be353ed3480476f032475e7c244eff7371d5"),
         ],
         "children": [
             hashutil.hash_to_bytes("123546353ed3480476f032475e7c244eff7371d5"),
         ],
         "metadata": {
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ]
         },
     }
 
     expected_revision = {
         "id": "18d8be353ed3480476f032475e7c233eff7371d5",
         "directory": "7834ef7e7c357ce2af928115c6c6a42b7e2a44e6",
         "author": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "committer": {
             "name": "Software Heritage",
             "fullname": "robot robot@softwareheritage.org",
             "email": "robot@softwareheritage.org",
         },
         "message": "invalid message \\xff",
         "decoding_failures": ["message"],
         "date": "2000-01-17T11:23:54+00:00",
         "committer_date": "2000-01-17T11:23:54+00:00",
         "children": ["123546353ed3480476f032475e7c244eff7371d5"],
         "parents": [
             "29d8be353ed3480476f032475e7c244eff7371d5",
             "30d8be353ed3480476f032475e7c244eff7371d5",
         ],
         "type": "tar",
         "synthetic": True,
         "metadata": {
             "original_artifact": [
                 {
                     "archive_type": "tar",
                     "name": "webbase-5.7.0.tar.gz",
                     "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd",
                     "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1",
                     "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f"
                     "309d36484e7edf7bb912",
                 }
             ]
         },
         "merge": True,
     }
 
     actual_revision = converters.from_revision(revision_input)
 
     assert actual_revision == expected_revision
 
 
 def test_from_content_none():
     assert converters.from_content(None) is None
 
 
 def test_from_content():
     content_input = {
         "sha1": hashutil.hash_to_bytes("5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5"),
         "sha256": hashutil.hash_to_bytes(
             "39007420ca5de7cb3cfc15196335507e" "e76c98930e7e0afa4d2747d3bf96c926"
         ),
         "blake2s256": hashutil.hash_to_bytes(
             "49007420ca5de7cb3cfc15196335507e" "e76c98930e7e0afa4d2747d3bf96c926"
         ),
         "sha1_git": hashutil.hash_to_bytes("40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03"),
         "ctime": "something-which-is-filtered-out",
         "data": b"data in bytes",
         "length": 10,
         "status": "hidden",
     }
 
     # 'status' is filtered
     expected_content = {
         "checksums": {
             "sha1": "5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5",
             "sha256": "39007420ca5de7cb3cfc15196335507ee76c98"
             "930e7e0afa4d2747d3bf96c926",
             "blake2s256": "49007420ca5de7cb3cfc15196335507ee7"
             "6c98930e7e0afa4d2747d3bf96c926",
             "sha1_git": "40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03",
         },
         "data": b"data in bytes",
         "length": 10,
         "status": "absent",
     }
 
     actual_content = converters.from_content(content_input)
 
     assert actual_content == expected_content
 
 
 def test_from_person():
     person_input = {
         "id": 10,
         "anything": "else",
         "name": b"bob",
         "fullname": b"bob bob@alice.net",
         "email": b"bob@foo.alice",
     }
 
     expected_person = {
         "id": 10,
         "anything": "else",
         "name": "bob",
         "fullname": "bob bob@alice.net",
         "email": "bob@foo.alice",
     }
 
     actual_person = converters.from_person(person_input)
 
     assert actual_person == expected_person
 
 
 def test_from_directory_entries():
     dir_entries_input = {
         "sha1": hashutil.hash_to_bytes("5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5"),
         "sha256": hashutil.hash_to_bytes(
             "39007420ca5de7cb3cfc15196335507e" "e76c98930e7e0afa4d2747d3bf96c926"
         ),
         "sha1_git": hashutil.hash_to_bytes("40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03"),
         "blake2s256": hashutil.hash_to_bytes(
             "685395c5dc57cada459364f0946d3dd45bad5fcbab" "c1048edb44380f1d31d0aa"
         ),
         "target": hashutil.hash_to_bytes("40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03"),
         "dir_id": hashutil.hash_to_bytes("40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03"),
         "name": b"bob",
         "type": 10,
         "status": "hidden",
     }
 
     expected_dir_entries = {
         "checksums": {
             "sha1": "5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5",
             "sha256": "39007420ca5de7cb3cfc15196335507ee76c98"
             "930e7e0afa4d2747d3bf96c926",
             "sha1_git": "40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03",
             "blake2s256": "685395c5dc57cada459364f0946d3dd45bad5f"
             "cbabc1048edb44380f1d31d0aa",
         },
         "target": "40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03",
         "dir_id": "40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03",
         "name": "bob",
         "type": 10,
         "status": "absent",
     }
 
     actual_dir_entries = converters.from_directory_entry(dir_entries_input)
 
     assert actual_dir_entries == expected_dir_entries
 
 
 def test_from_filetype():
     content_filetype = {
         "id": hashutil.hash_to_bytes("5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5"),
         "encoding": "utf-8",
         "mimetype": "text/plain",
     }
 
     expected_content_filetype = {
         "id": "5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5",
         "encoding": "utf-8",
         "mimetype": "text/plain",
     }
 
     actual_content_filetype = converters.from_filetype(content_filetype)
 
     assert actual_content_filetype == expected_content_filetype