Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import attr | |||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import inspect | import inspect | ||||
import itertools | import itertools | ||||
import math | import math | ||||
import queue | import queue | ||||
import random | import random | ||||
import threading | import threading | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from datetime import timedelta | from datetime import timedelta | ||||
from unittest.mock import Mock | from unittest.mock import Mock | ||||
import attr | |||||
import psycopg2 | import psycopg2 | ||||
import pytest | import pytest | ||||
from hypothesis import given, strategies, settings, HealthCheck | from hypothesis import given, strategies, settings, HealthCheck | ||||
from typing import ClassVar, Optional | from typing import ClassVar, Optional | ||||
from swh.model import from_disk, identifiers | from swh.model import from_disk, identifiers | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.identifiers import SWHID | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
Snapshot, | Snapshot, | ||||
MetadataTargetType, | |||||
) | ) | ||||
from swh.model.hypothesis_strategies import objects | from swh.model.hypothesis_strategies import objects | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.converters import origin_url_to_sha1 as sha1 | from swh.storage.converters import origin_url_to_sha1 as sha1 | ||||
from swh.storage.exc import HashCollision, StorageArgumentException | from swh.storage.exc import HashCollision, StorageArgumentException | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from swh.storage.utils import content_hex_hashes, now | from swh.storage.utils import content_hex_hashes, now | ||||
▲ Show 20 Lines • Show All 3,193 Lines • ▼ Show 20 Lines | def test_object_find_by_sha1_git(self, swh_storage): | ||||
] | ] | ||||
ret = swh_storage.object_find_by_sha1_git(sha1_gits) | ret = swh_storage.object_find_by_sha1_git(sha1_gits) | ||||
assert expected == ret | assert expected == ret | ||||
def test_metadata_fetcher_add_get(self, swh_storage): | def test_metadata_fetcher_add_get(self, swh_storage): | ||||
actual_fetcher = swh_storage.metadata_fetcher_get( | actual_fetcher = swh_storage.metadata_fetcher_get( | ||||
data.metadata_fetcher["name"], data.metadata_fetcher["version"] | data.metadata_fetcher.name, data.metadata_fetcher.version | ||||
) | ) | ||||
assert actual_fetcher is None # does not exist | assert actual_fetcher is None # does not exist | ||||
swh_storage.metadata_fetcher_add(**data.metadata_fetcher) | swh_storage.metadata_fetcher_add([data.metadata_fetcher]) | ||||
res = swh_storage.metadata_fetcher_get( | res = swh_storage.metadata_fetcher_get( | ||||
data.metadata_fetcher["name"], data.metadata_fetcher["version"] | data.metadata_fetcher.name, data.metadata_fetcher.version | ||||
) | ) | ||||
assert res is not data.metadata_fetcher | |||||
assert res == data.metadata_fetcher | assert res == data.metadata_fetcher | ||||
def test_metadata_authority_add_get(self, swh_storage): | def test_metadata_authority_add_get(self, swh_storage): | ||||
actual_authority = swh_storage.metadata_authority_get( | actual_authority = swh_storage.metadata_authority_get( | ||||
data.metadata_authority["type"], data.metadata_authority["url"] | data.metadata_authority.type, data.metadata_authority.url | ||||
) | ) | ||||
assert actual_authority is None # does not exist | assert actual_authority is None # does not exist | ||||
swh_storage.metadata_authority_add(**data.metadata_authority) | swh_storage.metadata_authority_add([data.metadata_authority]) | ||||
res = swh_storage.metadata_authority_get( | res = swh_storage.metadata_authority_get( | ||||
data.metadata_authority["type"], data.metadata_authority["url"] | data.metadata_authority.type, data.metadata_authority.url | ||||
) | ) | ||||
assert res is not data.metadata_authority | |||||
assert res == data.metadata_authority | assert res == data.metadata_authority | ||||
def test_content_metadata_add(self, swh_storage): | def test_content_metadata_add(self, swh_storage): | ||||
content = data.cont | content = data.cont | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
content_swhid = f"swh:1:cnt:{content['sha1_git']}" | content_swhid = SWHID( | ||||
object_type="content", object_id=hash_to_bytes(content["sha1_git"]) | |||||
) | |||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.content_metadata_add(**data.content_metadata) | swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) | ||||
swh_storage.content_metadata_add(**data.content_metadata2) | |||||
result = swh_storage.content_metadata_get(content_swhid, authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.content_metadata, data.content_metadata2] == list( | assert [data.content_metadata, data.content_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
def test_content_metadata_add_duplicate(self, swh_storage): | def test_content_metadata_add_duplicate(self, swh_storage): | ||||
"""Duplicates should be silently updated.""" | """Duplicates should be silently updated.""" | ||||
content = data.cont | content = data.cont | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
content_swhid = f"swh:1:cnt:{content['sha1_git']}" | content_swhid = SWHID( | ||||
object_type="content", object_id=hash_to_bytes(content["sha1_git"]) | |||||
) | |||||
new_content_metadata2 = { | new_content_metadata2 = attr.evolve( | ||||
**data.content_metadata2, | data.content_metadata2, format="new-format", metadata=b"new-metadata", | ||||
"format": "new-format", | ) | ||||
"metadata": b"new-metadata", | |||||
} | |||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.content_metadata_add(**data.content_metadata) | swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) | ||||
swh_storage.content_metadata_add(**data.content_metadata2) | swh_storage.object_metadata_add([new_content_metadata2]) | ||||
swh_storage.content_metadata_add(**new_content_metadata2) | |||||
result = swh_storage.content_metadata_get(content_swhid, authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
expected_results1 = (data.content_metadata, new_content_metadata2) | expected_results1 = (data.content_metadata, new_content_metadata2) | ||||
expected_results2 = (data.content_metadata, data.content_metadata2) | expected_results2 = (data.content_metadata, data.content_metadata2) | ||||
assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( | assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in ( | ||||
expected_results1, # cassandra | expected_results1, # cassandra | ||||
expected_results2, # postgresql | expected_results2, # postgresql | ||||
) | ) | ||||
def test_content_metadata_add_dict(self, swh_storage): | |||||
fetcher = data.metadata_fetcher | |||||
authority = data.metadata_authority | |||||
swh_storage.metadata_fetcher_add(**fetcher) | |||||
swh_storage.metadata_authority_add(**authority) | |||||
kwargs = data.content_metadata.copy() | |||||
kwargs["metadata"] = {"foo": "bar"} | |||||
with pytest.raises(StorageArgumentException): | |||||
swh_storage.content_metadata_add(**kwargs) | |||||
def test_content_metadata_get(self, swh_storage): | def test_content_metadata_get(self, swh_storage): | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority2 = data.metadata_authority2 | authority2 = data.metadata_authority2 | ||||
fetcher2 = data.metadata_fetcher2 | fetcher2 = data.metadata_fetcher2 | ||||
content1_swhid = f"swh:1:cnt:{data.cont['sha1_git']}" | content1_swhid = SWHID( | ||||
content2_swhid = f"swh:1:cnt:{data.cont2['sha1_git']}" | object_type="content", object_id=hash_to_bytes(data.cont["sha1_git"]) | ||||
) | |||||
content2_swhid = SWHID( | |||||
object_type="content", object_id=hash_to_bytes(data.cont2["sha1_git"]) | |||||
) | |||||
content1_metadata1 = data.content_metadata | content1_metadata1 = data.content_metadata | ||||
content1_metadata2 = data.content_metadata2 | content1_metadata2 = data.content_metadata2 | ||||
content1_metadata3 = data.content_metadata3 | content1_metadata3 = data.content_metadata3 | ||||
content2_metadata = {**data.content_metadata2, "id": content2_swhid} | content2_metadata = attr.evolve(data.content_metadata2, id=content2_swhid) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority, authority2]) | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | ||||
swh_storage.metadata_authority_add(**authority2) | |||||
swh_storage.metadata_fetcher_add(**fetcher2) | |||||
swh_storage.content_metadata_add(**content1_metadata1) | |||||
swh_storage.content_metadata_add(**content1_metadata2) | |||||
swh_storage.content_metadata_add(**content1_metadata3) | |||||
swh_storage.content_metadata_add(**content2_metadata) | |||||
result = swh_storage.content_metadata_get(content1_swhid, authority) | swh_storage.object_metadata_add( | ||||
[ | |||||
content1_metadata1, | |||||
content1_metadata2, | |||||
content1_metadata3, | |||||
content2_metadata, | |||||
] | |||||
) | |||||
result = swh_storage.object_metadata_get( | |||||
MetadataTargetType.CONTENT, content1_swhid, authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [content1_metadata1, content1_metadata2] == list( | assert [content1_metadata1, content1_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.content_metadata_get(content1_swhid, authority2) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content1_swhid, authority2 | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [content1_metadata3] == list( | assert [content1_metadata3] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.content_metadata_get(content2_swhid, authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content2_swhid, authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [content2_metadata] == list(result["results"],) | assert [content2_metadata] == list(result["results"],) | ||||
def test_content_metadata_get_after(self, swh_storage): | def test_content_metadata_get_after(self, swh_storage): | ||||
content = data.cont | content = data.cont | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
content_swhid = f"swh:1:cnt:{content['sha1_git']}" | content_swhid = SWHID( | ||||
object_type="content", object_id=hash_to_bytes(content["sha1_git"]) | |||||
) | |||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.content_metadata_add(**data.content_metadata) | swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) | ||||
swh_storage.content_metadata_add(**data.content_metadata2) | |||||
result = swh_storage.content_metadata_get( | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, | |||||
content_swhid, | content_swhid, | ||||
authority, | authority, | ||||
after=data.content_metadata["discovery_date"] - timedelta(seconds=1), | after=data.content_metadata.discovery_date - timedelta(seconds=1), | ||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.content_metadata, data.content_metadata2] == list( | assert [data.content_metadata, data.content_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.content_metadata_get( | result = swh_storage.object_metadata_get( | ||||
content_swhid, authority, after=data.content_metadata["discovery_date"] | MetadataTargetType.CONTENT, | ||||
content_swhid, | |||||
authority, | |||||
after=data.content_metadata.discovery_date, | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.content_metadata2] == result["results"] | assert [data.content_metadata2] == result["results"] | ||||
result = swh_storage.content_metadata_get( | result = swh_storage.object_metadata_get( | ||||
content_swhid, authority, after=data.content_metadata2["discovery_date"] | MetadataTargetType.CONTENT, | ||||
content_swhid, | |||||
authority, | |||||
after=data.content_metadata2.discovery_date, | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [] == result["results"] | assert [] == result["results"] | ||||
def test_content_metadata_get_paginate(self, swh_storage): | def test_content_metadata_get_paginate(self, swh_storage): | ||||
content = data.cont | content = data.cont | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
content_swhid = f"swh:1:cnt:{content['sha1_git']}" | content_swhid = SWHID( | ||||
object_type="content", object_id=hash_to_bytes(content["sha1_git"]) | |||||
) | |||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.content_metadata_add(**data.content_metadata) | swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) | ||||
swh_storage.content_metadata_add(**data.content_metadata2) | |||||
swh_storage.content_metadata_get(content_swhid, authority) | swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority | |||||
) | |||||
result = swh_storage.content_metadata_get(content_swhid, authority, limit=1) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority, limit=1 | |||||
) | |||||
assert result["next_page_token"] is not None | assert result["next_page_token"] is not None | ||||
assert [data.content_metadata] == result["results"] | assert [data.content_metadata] == result["results"] | ||||
result = swh_storage.content_metadata_get( | result = swh_storage.object_metadata_get( | ||||
content_swhid, authority, limit=1, page_token=result["next_page_token"] | MetadataTargetType.CONTENT, | ||||
content_swhid, | |||||
authority, | |||||
limit=1, | |||||
page_token=result["next_page_token"], | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.content_metadata2] == result["results"] | assert [data.content_metadata2] == result["results"] | ||||
def test_content_metadata_get_paginate_same_date(self, swh_storage): | def test_content_metadata_get_paginate_same_date(self, swh_storage): | ||||
content = data.cont | content = data.cont | ||||
fetcher1 = data.metadata_fetcher | fetcher1 = data.metadata_fetcher | ||||
fetcher2 = data.metadata_fetcher2 | fetcher2 = data.metadata_fetcher2 | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
content_swhid = f"swh:1:cnt:{content['sha1_git']}" | content_swhid = SWHID( | ||||
object_type="content", object_id=hash_to_bytes(content["sha1_git"]) | |||||
) | |||||
swh_storage.metadata_fetcher_add(**fetcher1) | swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | ||||
swh_storage.metadata_fetcher_add(**fetcher2) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.metadata_authority_add(**authority) | |||||
content_metadata2 = { | content_metadata2 = attr.evolve( | ||||
**data.content_metadata2, | data.content_metadata2, | ||||
"discovery_date": data.content_metadata2["discovery_date"], | discovery_date=data.content_metadata2.discovery_date, | ||||
"fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, | fetcher=attr.evolve(fetcher2, metadata=None), | ||||
} | ) | ||||
swh_storage.content_metadata_add(**data.content_metadata) | swh_storage.object_metadata_add([data.content_metadata, content_metadata2]) | ||||
swh_storage.content_metadata_add(**content_metadata2) | |||||
result = swh_storage.content_metadata_get(content_swhid, authority, limit=1) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority, limit=1 | |||||
) | |||||
assert result["next_page_token"] is not None | assert result["next_page_token"] is not None | ||||
assert [data.content_metadata] == result["results"] | assert [data.content_metadata] == result["results"] | ||||
result = swh_storage.content_metadata_get( | result = swh_storage.object_metadata_get( | ||||
content_swhid, authority, limit=1, page_token=result["next_page_token"] | MetadataTargetType.CONTENT, | ||||
content_swhid, | |||||
authority, | |||||
limit=1, | |||||
page_token=result["next_page_token"], | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [content_metadata2] == result["results"] | assert [content_metadata2] == result["results"] | ||||
def test_content_metadata_get__invalid_id(self, swh_storage): | |||||
fetcher = data.metadata_fetcher | |||||
authority = data.metadata_authority | |||||
swh_storage.metadata_fetcher_add([fetcher]) | |||||
swh_storage.metadata_authority_add([authority]) | |||||
swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) | |||||
with pytest.raises(StorageArgumentException, match="SWHID"): | |||||
swh_storage.object_metadata_get( | |||||
MetadataTargetType.CONTENT, data.origin["url"], authority | |||||
) | |||||
def test_origin_metadata_add(self, swh_storage): | def test_origin_metadata_add(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | |||||
result = swh_storage.origin_metadata_get(origin["url"], authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin["url"], authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata, data.origin_metadata2] == list( | assert [data.origin_metadata, data.origin_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date) | ||||
) | ) | ||||
def test_origin_metadata_add_duplicate(self, swh_storage): | def test_origin_metadata_add_duplicate(self, swh_storage): | ||||
"""Duplicates should be silently updated.""" | """Duplicates should be silently updated.""" | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
new_origin_metadata2 = { | new_origin_metadata2 = attr.evolve( | ||||
**data.origin_metadata2, | data.origin_metadata2, format="new-format", metadata=b"new-metadata", | ||||
"format": "new-format", | ) | ||||
"metadata": b"new-metadata", | |||||
} | |||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | swh_storage.object_metadata_add([new_origin_metadata2]) | ||||
swh_storage.origin_metadata_add(**new_origin_metadata2) | |||||
result = swh_storage.origin_metadata_get(origin["url"], authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin["url"], authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
# which of the two behavior happens is backend-specific. | # which of the two behavior happens is backend-specific. | ||||
expected_results1 = (data.origin_metadata, new_origin_metadata2) | expected_results1 = (data.origin_metadata, new_origin_metadata2) | ||||
expected_results2 = (data.origin_metadata, data.origin_metadata2) | expected_results2 = (data.origin_metadata, data.origin_metadata2) | ||||
assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( | assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in ( | ||||
expected_results1, # cassandra | expected_results1, # cassandra | ||||
expected_results2, # postgresql | expected_results2, # postgresql | ||||
) | ) | ||||
def test_origin_metadata_add_dict(self, swh_storage): | |||||
origin = data.origin | |||||
fetcher = data.metadata_fetcher | |||||
authority = data.metadata_authority | |||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | |||||
swh_storage.metadata_fetcher_add(**fetcher) | |||||
swh_storage.metadata_authority_add(**authority) | |||||
kwargs = data.origin_metadata.copy() | |||||
kwargs["metadata"] = {"foo": "bar"} | |||||
with pytest.raises(StorageArgumentException): | |||||
swh_storage.origin_metadata_add(**kwargs) | |||||
def test_origin_metadata_get(self, swh_storage): | def test_origin_metadata_get(self, swh_storage): | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority2 = data.metadata_authority2 | authority2 = data.metadata_authority2 | ||||
fetcher2 = data.metadata_fetcher2 | fetcher2 = data.metadata_fetcher2 | ||||
origin_url1 = data.origin["url"] | origin_url1 = data.origin["url"] | ||||
origin_url2 = data.origin2["url"] | origin_url2 = data.origin2["url"] | ||||
assert swh_storage.origin_add([data.origin, data.origin2]) == {"origin:add": 2} | assert swh_storage.origin_add([data.origin, data.origin2]) == {"origin:add": 2} | ||||
origin1_metadata1 = data.origin_metadata | origin1_metadata1 = data.origin_metadata | ||||
origin1_metadata2 = data.origin_metadata2 | origin1_metadata2 = data.origin_metadata2 | ||||
origin1_metadata3 = data.origin_metadata3 | origin1_metadata3 = data.origin_metadata3 | ||||
origin2_metadata = {**data.origin_metadata2, "origin_url": origin_url2} | origin2_metadata = attr.evolve(data.origin_metadata2, id=origin_url2) | ||||
swh_storage.metadata_authority_add([authority, authority2]) | |||||
swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | |||||
swh_storage.metadata_authority_add(**authority) | swh_storage.object_metadata_add( | ||||
swh_storage.metadata_fetcher_add(**fetcher) | [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] | ||||
swh_storage.metadata_authority_add(**authority2) | ) | ||||
swh_storage.metadata_fetcher_add(**fetcher2) | |||||
swh_storage.origin_metadata_add(**origin1_metadata1) | |||||
swh_storage.origin_metadata_add(**origin1_metadata2) | |||||
swh_storage.origin_metadata_add(**origin1_metadata3) | |||||
swh_storage.origin_metadata_add(**origin2_metadata) | |||||
result = swh_storage.origin_metadata_get(origin_url1, authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin_url1, authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [origin1_metadata1, origin1_metadata2] == list( | assert [origin1_metadata1, origin1_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.origin_metadata_get(origin_url1, authority2) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin_url1, authority2 | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [origin1_metadata3] == list( | assert [origin1_metadata3] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.origin_metadata_get(origin_url2, authority) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin_url2, authority | |||||
) | |||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [origin2_metadata] == list(result["results"],) | assert [origin2_metadata] == list(result["results"],) | ||||
def test_origin_metadata_get_after(self, swh_storage): | def test_origin_metadata_get_after(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | |||||
result = swh_storage.origin_metadata_get( | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, | |||||
origin["url"], | origin["url"], | ||||
authority, | authority, | ||||
after=data.origin_metadata["discovery_date"] - timedelta(seconds=1), | after=data.origin_metadata.discovery_date - timedelta(seconds=1), | ||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata, data.origin_metadata2] == list( | assert [data.origin_metadata, data.origin_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.origin_metadata_get( | result = swh_storage.object_metadata_get( | ||||
origin["url"], authority, after=data.origin_metadata["discovery_date"] | MetadataTargetType.ORIGIN, | ||||
origin["url"], | |||||
authority, | |||||
after=data.origin_metadata.discovery_date, | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata2] == result["results"] | assert [data.origin_metadata2] == result["results"] | ||||
result = swh_storage.origin_metadata_get( | result = swh_storage.object_metadata_get( | ||||
origin["url"], authority, after=data.origin_metadata2["discovery_date"] | MetadataTargetType.ORIGIN, | ||||
origin["url"], | |||||
authority, | |||||
after=data.origin_metadata2.discovery_date, | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [] == result["results"] | assert [] == result["results"] | ||||
def test_origin_metadata_get_paginate(self, swh_storage): | def test_origin_metadata_get_paginate(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | |||||
swh_storage.origin_metadata_get(origin["url"], authority) | swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin["url"], authority | |||||
) | |||||
result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin["url"], authority, limit=1 | |||||
) | |||||
assert result["next_page_token"] is not None | assert result["next_page_token"] is not None | ||||
assert [data.origin_metadata] == result["results"] | assert [data.origin_metadata] == result["results"] | ||||
result = swh_storage.origin_metadata_get( | result = swh_storage.object_metadata_get( | ||||
origin["url"], authority, limit=1, page_token=result["next_page_token"] | MetadataTargetType.ORIGIN, | ||||
origin["url"], | |||||
authority, | |||||
limit=1, | |||||
page_token=result["next_page_token"], | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata2] == result["results"] | assert [data.origin_metadata2] == result["results"] | ||||
def test_origin_metadata_get_paginate_same_date(self, swh_storage): | def test_origin_metadata_get_paginate_same_date(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher1 = data.metadata_fetcher | fetcher1 = data.metadata_fetcher | ||||
fetcher2 = data.metadata_fetcher2 | fetcher2 = data.metadata_fetcher2 | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher1) | swh_storage.metadata_fetcher_add([fetcher1]) | ||||
swh_storage.metadata_fetcher_add(**fetcher2) | swh_storage.metadata_fetcher_add([fetcher2]) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add([authority]) | ||||
origin_metadata2 = { | origin_metadata2 = attr.evolve( | ||||
**data.origin_metadata2, | data.origin_metadata2, | ||||
"discovery_date": data.origin_metadata2["discovery_date"], | discovery_date=data.origin_metadata2.discovery_date, | ||||
"fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, | fetcher=attr.evolve(fetcher2, metadata=None), | ||||
} | ) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.object_metadata_add([data.origin_metadata, origin_metadata2]) | ||||
swh_storage.origin_metadata_add(**origin_metadata2) | |||||
result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1) | result = swh_storage.object_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin["url"], authority, limit=1 | |||||
) | |||||
assert result["next_page_token"] is not None | assert result["next_page_token"] is not None | ||||
assert [data.origin_metadata] == result["results"] | assert [data.origin_metadata] == result["results"] | ||||
result = swh_storage.origin_metadata_get( | result = swh_storage.object_metadata_get( | ||||
origin["url"], authority, limit=1, page_token=result["next_page_token"] | MetadataTargetType.ORIGIN, | ||||
origin["url"], | |||||
authority, | |||||
limit=1, | |||||
page_token=result["next_page_token"], | |||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [origin_metadata2] == result["results"] | assert [origin_metadata2] == result["results"] | ||||
def test_origin_metadata_add_missing_authority(self, swh_storage): | |||||
origin = data.origin | |||||
fetcher = data.metadata_fetcher | |||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | |||||
swh_storage.metadata_fetcher_add([fetcher]) | |||||
with pytest.raises(StorageArgumentException, match="authority"): | |||||
swh_storage.object_metadata_add( | |||||
[data.origin_metadata, data.origin_metadata2] | |||||
) | |||||
def test_origin_metadata_add_missing_fetcher(self, swh_storage): | |||||
origin = data.origin | |||||
authority = data.metadata_authority | |||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | |||||
swh_storage.metadata_authority_add([authority]) | |||||
with pytest.raises(StorageArgumentException, match="fetcher"): | |||||
swh_storage.object_metadata_add( | |||||
[data.origin_metadata, data.origin_metadata2] | |||||
) | |||||
def test_origin_metadata_get__invalid_id_type(self, swh_storage): | |||||
origin = data.origin | |||||
fetcher = data.metadata_fetcher | |||||
authority = data.metadata_authority | |||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | |||||
swh_storage.metadata_fetcher_add([fetcher]) | |||||
swh_storage.metadata_authority_add([authority]) | |||||
swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) | |||||
with pytest.raises(StorageArgumentException, match="SWHID"): | |||||
swh_storage.object_metadata_get( | |||||
MetadataTargetType.ORIGIN, data.content_metadata.id, authority, | |||||
) | |||||
class TestStorageGeneratedData: | class TestStorageGeneratedData: | ||||
def test_generate_content_get(self, swh_storage, swh_contents): | def test_generate_content_get(self, swh_storage, swh_contents): | ||||
contents_with_data = [c for c in swh_contents if c["status"] != "absent"] | contents_with_data = [c for c in swh_contents if c["status"] != "absent"] | ||||
# input the list of sha1s we want from storage | # input the list of sha1s we want from storage | ||||
get_sha1s = [c["sha1"] for c in contents_with_data] | get_sha1s = [c["sha1"] for c in contents_with_data] | ||||
# retrieve contents | # retrieve contents | ||||
▲ Show 20 Lines • Show All 524 Lines • Show Last 20 Lines |