Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_origin_head.py
# Copyright (C) 2017-2022 The Software Heritage developers | # Copyright (C) 2017-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import itertools | |||||
import pytest | import pytest | ||||
from swh.indexer.origin_head import get_head_swhid | from swh.indexer.origin_head import get_head_swhid | ||||
from swh.indexer.tests.utils import fill_storage | from swh.indexer.tests.utils import fill_storage | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.model.swhids import CoreSWHID | from swh.model.swhids import CoreSWHID | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
@pytest.fixture | |||||
def swh_storage_backend_config(): | |||||
"""In-memory storage, to make tests go faster.""" | |||||
return {"cls": "memory"} | |||||
SAMPLE_SNAPSHOT = Snapshot( | SAMPLE_SNAPSHOT = Snapshot( | ||||
branches={ | branches={ | ||||
b"foo": None, | b"foo": None, | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target_type=TargetType.ALIAS, | target_type=TargetType.ALIAS, | ||||
target=b"foo", | target=b"foo", | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
def _add_snapshot_to_origin(storage, origin_url, visit_type, snapshot): | |||||
storage.origin_add([Origin(url=origin_url)]) | |||||
visit = storage.origin_visit_add( | |||||
[ | |||||
OriginVisit( | |||||
origin=origin_url, | |||||
date=datetime(2019, 2, 27, tzinfo=timezone.utc), | |||||
type="pypi", | |||||
) | |||||
] | |||||
)[0] | |||||
storage.snapshot_add([snapshot]) | |||||
visit_status = OriginVisitStatus( | |||||
origin=origin_url, | |||||
visit=visit.visit, | |||||
date=now(), | |||||
status="full", | |||||
snapshot=snapshot.id, | |||||
) | |||||
storage.origin_visit_status_add([visit_status]) | |||||
@pytest.fixture | @pytest.fixture | ||||
def storage(swh_storage): | def storage(swh_storage): | ||||
fill_storage(swh_storage) | fill_storage(swh_storage) | ||||
return swh_storage | return swh_storage | ||||
def test_git(storage): | def test_git(storage): | ||||
origin_url = "https://github.com/SoftwareHeritage/swh-storage" | origin_url = "https://github.com/SoftwareHeritage/swh-storage" | ||||
Show All 30 Lines | |||||
def test_vcs_missing_snapshot(storage): | def test_vcs_missing_snapshot(storage): | ||||
origin_url = "https://github.com/SoftwareHeritage/swh-indexer" | origin_url = "https://github.com/SoftwareHeritage/swh-indexer" | ||||
storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
assert get_head_swhid(storage, origin_url) is None | assert get_head_swhid(storage, origin_url) is None | ||||
def test_pypi_missing_branch(storage): | def test_pypi_missing_branch(storage): | ||||
origin_url = "https://pypi.org/project/abcdef/" | origin_url = "https://pypi.org/project/abcdef/" | ||||
storage.origin_add( | _add_snapshot_to_origin(storage, origin_url, "pypi", SAMPLE_SNAPSHOT) | ||||
[ | assert get_head_swhid(storage, origin_url) is None | ||||
Origin( | |||||
url=origin_url, | |||||
@pytest.mark.parametrize( | |||||
"branches_start,branches_middle,branches_end", | |||||
itertools.product([0, 40, 99, 100, 200], [0, 40, 99, 100, 200], [0, 40, 200]), | |||||
) | |||||
def test_large_snapshot(storage, branches_start, branches_middle, branches_end): | |||||
rev_id = "8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | |||||
snapshot = Snapshot( | |||||
branches=dict( | |||||
[(f"AAAA{i}".encode(), None) for i in range(branches_start)] | |||||
+ [ | |||||
( | |||||
b"HEAD", | |||||
SnapshotBranch( | |||||
target_type=TargetType.ALIAS, target=b"refs/heads/foo" | |||||
), | |||||
) | ) | ||||
] | ] | ||||
+ [(f"aaaa{i}".encode(), None) for i in range(branches_middle)] | |||||
+ [ | |||||
( | |||||
b"refs/heads/foo", | |||||
SnapshotBranch( | |||||
target_type=TargetType.REVISION, | |||||
target=bytes.fromhex(rev_id), | |||||
), | |||||
) | |||||
] | |||||
+ [(f"zzzz{i}".encode(), None) for i in range(branches_end)] | |||||
) | ) | ||||
visit = storage.origin_visit_add( | ) | ||||
[ | |||||
OriginVisit( | origin_url = "https://example.org/repo.git" | ||||
origin=origin_url, | _add_snapshot_to_origin(storage, origin_url, "git", snapshot) | ||||
date=datetime(2019, 2, 27, tzinfo=timezone.utc), | |||||
type="pypi", | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
"swh:1:rev:8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | |||||
) | |||||
def test_large_snapshot_chained_aliases(storage): | |||||
rev_id = "8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | |||||
snapshot = Snapshot( | |||||
branches=dict( | |||||
[(f"AAAA{i}".encode(), None) for i in range(200)] | |||||
+ [ | |||||
( | |||||
b"HEAD", | |||||
SnapshotBranch( | |||||
target_type=TargetType.ALIAS, target=b"refs/heads/alias2" | |||||
), | |||||
) | ) | ||||
] | ] | ||||
)[0] | + [(f"aaaa{i}".encode(), None) for i in range(200)] | ||||
storage.snapshot_add([SAMPLE_SNAPSHOT]) | + [ | ||||
visit_status = OriginVisitStatus( | ( | ||||
origin=origin_url, | b"refs/heads/alias2", | ||||
visit=visit.visit, | SnapshotBranch( | ||||
date=now(), | target_type=TargetType.ALIAS, target=b"refs/heads/branch" | ||||
status="full", | ), | ||||
snapshot=SAMPLE_SNAPSHOT.id, | |||||
) | ) | ||||
storage.origin_visit_status_add([visit_status]) | ] | ||||
+ [(f"refs/heads/bbbb{i}".encode(), None) for i in range(200)] | |||||
+ [ | |||||
( | |||||
b"refs/heads/branch", | |||||
SnapshotBranch( | |||||
target_type=TargetType.REVISION, | |||||
target=bytes.fromhex(rev_id), | |||||
), | |||||
) | |||||
] | |||||
) | |||||
) | |||||
origin_url = "https://example.org/repo.git" | |||||
_add_snapshot_to_origin(storage, origin_url, "git", snapshot) | |||||
assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | |||||
"swh:1:rev:8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | |||||
) | |||||
@pytest.mark.parametrize( | |||||
"branches_start,branches_end", | |||||
itertools.product([0, 40, 99, 100, 200], [0, 40, 200]), | |||||
) | |||||
def test_large_snapshot_dangling_alias(storage, branches_start, branches_end): | |||||
snapshot = Snapshot( | |||||
branches=dict( | |||||
[(f"AAAA{i}".encode(), None) for i in range(branches_start)] | |||||
+ [ | |||||
( | |||||
b"HEAD", | |||||
SnapshotBranch( | |||||
target_type=TargetType.ALIAS, target=b"refs/heads/foo" | |||||
), | |||||
) | |||||
] | |||||
+ [(f"zzzz{i}".encode(), None) for i in range(branches_end)] | |||||
) | |||||
) | |||||
origin_url = "https://example.org/repo.git" | |||||
_add_snapshot_to_origin(storage, origin_url, "git", snapshot) | |||||
assert get_head_swhid(storage, origin_url) is None | assert get_head_swhid(storage, origin_url) is None | ||||
def test_ftp(storage): | def test_ftp(storage): | ||||
origin_url = "rsync://ftp.gnu.org/gnu/3dldf" | origin_url = "rsync://ftp.gnu.org/gnu/3dldf" | ||||
assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
"swh:1:rev:8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | "swh:1:rev:8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines |