Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_origin_head.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | |||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import unittest | |||||
import pytest | import pytest | ||||
from swh.indexer.origin_head import OriginHeadIndexer | from swh.indexer.origin_head import get_head_swhid | ||||
from swh.indexer.tests.utils import fill_storage | from swh.indexer.tests.utils import fill_storage | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.model.swhids import CoreSWHID | |||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
@pytest.fixture | |||||
def swh_indexer_config(swh_indexer_config): | |||||
config = copy.deepcopy(swh_indexer_config) | |||||
config.update( | |||||
{ | |||||
"tools": { | |||||
"name": "origin-metadata", | |||||
"version": "0.0.1", | |||||
"configuration": {}, | |||||
}, | |||||
"tasks": { | |||||
"revision_intrinsic_metadata": None, | |||||
"origin_intrinsic_metadata": None, | |||||
}, | |||||
} | |||||
) | |||||
return config | |||||
class OriginHeadTestIndexer(OriginHeadIndexer): | |||||
"""Specific indexer whose configuration is enough to satisfy the | |||||
indexing tests. | |||||
""" | |||||
def persist_index_computations(self, results): | |||||
self.results = results | |||||
SAMPLE_SNAPSHOT = Snapshot( | SAMPLE_SNAPSHOT = Snapshot( | ||||
branches={ | branches={ | ||||
b"foo": None, | b"foo": None, | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target_type=TargetType.ALIAS, | target_type=TargetType.ALIAS, | ||||
target=b"foo", | target=b"foo", | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
class OriginHead(unittest.TestCase): | @pytest.fixture | ||||
@pytest.fixture(autouse=True) | def storage(swh_storage): | ||||
def init(self, swh_config): | fill_storage(swh_storage) | ||||
super().setUp() | return swh_storage | ||||
self.indexer = OriginHeadTestIndexer() | |||||
self.indexer.catch_exceptions = False | |||||
fill_storage(self.indexer.storage) | |||||
def test_git(self): | |||||
def test_git(storage): | |||||
origin_url = "https://github.com/SoftwareHeritage/swh-storage" | origin_url = "https://github.com/SoftwareHeritage/swh-storage" | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
rev_id = b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}\xac\xefrm" | "swh:1:rev:384b12006403cce45d6253e38f7bd77dacef726d" | ||||
self.assertEqual( | |||||
self.indexer.results, | |||||
[ | |||||
{ | |||||
"revision_id": rev_id, | |||||
"origin_url": origin_url, | |||||
} | |||||
], | |||||
) | ) | ||||
def test_git_partial_snapshot(self): | |||||
def test_git_partial_snapshot(storage): | |||||
"""Checks partial snapshots are ignored.""" | """Checks partial snapshots are ignored.""" | ||||
origin_url = "https://github.com/SoftwareHeritage/swh-core" | origin_url = "https://github.com/SoftwareHeritage/swh-core" | ||||
self.indexer.storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
visit = self.indexer.storage.origin_visit_add( | visit = storage.origin_visit_add( | ||||
[ | [ | ||||
OriginVisit( | OriginVisit( | ||||
origin=origin_url, | origin=origin_url, | ||||
date=datetime(2019, 2, 27, tzinfo=timezone.utc), | date=datetime(2019, 2, 27, tzinfo=timezone.utc), | ||||
type="git", | type="git", | ||||
) | ) | ||||
] | ] | ||||
)[0] | )[0] | ||||
self.indexer.storage.snapshot_add([SAMPLE_SNAPSHOT]) | storage.snapshot_add([SAMPLE_SNAPSHOT]) | ||||
visit_status = OriginVisitStatus( | visit_status = OriginVisitStatus( | ||||
origin=origin_url, | origin=origin_url, | ||||
visit=visit.visit, | visit=visit.visit, | ||||
date=now(), | date=now(), | ||||
status="partial", | status="partial", | ||||
snapshot=SAMPLE_SNAPSHOT.id, | snapshot=SAMPLE_SNAPSHOT.id, | ||||
) | ) | ||||
self.indexer.storage.origin_visit_status_add([visit_status]) | storage.origin_visit_status_add([visit_status]) | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) is None | ||||
self.assertEqual(self.indexer.results, []) | |||||
def test_vcs_missing_snapshot(self): | |||||
def test_vcs_missing_snapshot(storage): | |||||
origin_url = "https://github.com/SoftwareHeritage/swh-indexer" | origin_url = "https://github.com/SoftwareHeritage/swh-indexer" | ||||
self.indexer.storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) is None | ||||
self.assertEqual(self.indexer.results, []) | |||||
def test_pypi_missing_branch(self): | def test_pypi_missing_branch(storage): | ||||
origin_url = "https://pypi.org/project/abcdef/" | origin_url = "https://pypi.org/project/abcdef/" | ||||
self.indexer.storage.origin_add( | storage.origin_add( | ||||
[ | [ | ||||
Origin( | Origin( | ||||
url=origin_url, | url=origin_url, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
visit = self.indexer.storage.origin_visit_add( | visit = storage.origin_visit_add( | ||||
[ | [ | ||||
OriginVisit( | OriginVisit( | ||||
origin=origin_url, | origin=origin_url, | ||||
date=datetime(2019, 2, 27, tzinfo=timezone.utc), | date=datetime(2019, 2, 27, tzinfo=timezone.utc), | ||||
type="pypi", | type="pypi", | ||||
) | ) | ||||
] | ] | ||||
)[0] | )[0] | ||||
self.indexer.storage.snapshot_add([SAMPLE_SNAPSHOT]) | storage.snapshot_add([SAMPLE_SNAPSHOT]) | ||||
visit_status = OriginVisitStatus( | visit_status = OriginVisitStatus( | ||||
origin=origin_url, | origin=origin_url, | ||||
visit=visit.visit, | visit=visit.visit, | ||||
date=now(), | date=now(), | ||||
status="full", | status="full", | ||||
snapshot=SAMPLE_SNAPSHOT.id, | snapshot=SAMPLE_SNAPSHOT.id, | ||||
) | ) | ||||
self.indexer.storage.origin_visit_status_add([visit_status]) | storage.origin_visit_status_add([visit_status]) | ||||
self.indexer.run(["https://pypi.org/project/abcdef/"]) | assert get_head_swhid(storage, origin_url) is None | ||||
self.assertEqual(self.indexer.results, []) | |||||
def test_ftp(self): | |||||
def test_ftp(storage): | |||||
origin_url = "rsync://ftp.gnu.org/gnu/3dldf" | origin_url = "rsync://ftp.gnu.org/gnu/3dldf" | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
rev_id = b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee\xcc\x1a\xb4`\x8c\x8by" | "swh:1:rev:8ea98e2fea7d9f6546f49ffdeecc1ab4608c8b79" | ||||
self.assertEqual( | |||||
self.indexer.results, | |||||
[ | |||||
{ | |||||
"revision_id": rev_id, | |||||
"origin_url": origin_url, | |||||
} | |||||
], | |||||
) | ) | ||||
def test_ftp_missing_snapshot(self): | |||||
def test_ftp_missing_snapshot(storage): | |||||
origin_url = "rsync://ftp.gnu.org/gnu/foobar" | origin_url = "rsync://ftp.gnu.org/gnu/foobar" | ||||
self.indexer.storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) is None | ||||
self.assertEqual(self.indexer.results, []) | |||||
def test_deposit(self): | def test_deposit(storage): | ||||
origin_url = "https://forge.softwareheritage.org/source/jesuisgpl/" | origin_url = "https://forge.softwareheritage.org/source/jesuisgpl/" | ||||
self.indexer.storage.origin_add([Origin(url=origin_url)]) | storage.origin_add([Origin(url=origin_url)]) | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
rev_id = b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{\xa6\xe9\x99\xb1\x9e]q\xeb" | "swh:1:rev:e76ea49c9ffbb7f73611087ba6e999b19e5d71eb" | ||||
self.assertEqual( | |||||
self.indexer.results, | |||||
[ | |||||
{ | |||||
"revision_id": rev_id, | |||||
"origin_url": origin_url, | |||||
} | |||||
], | |||||
) | ) | ||||
def test_deposit_missing_snapshot(self): | |||||
def test_deposit_missing_snapshot(storage): | |||||
origin_url = "https://forge.softwareheritage.org/source/foobar" | origin_url = "https://forge.softwareheritage.org/source/foobar" | ||||
self.indexer.storage.origin_add( | storage.origin_add( | ||||
[ | [ | ||||
Origin( | Origin( | ||||
url=origin_url, | url=origin_url, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) is None | ||||
self.assertEqual(self.indexer.results, []) | |||||
def test_pypi(self): | |||||
origin_url = "https://pypi.org/project/limnoria/" | |||||
self.indexer.run([origin_url]) | |||||
rev_id = b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8kA\x10\x9d\xc5\xfa2\xf8t" | def test_pypi(storage): | ||||
self.assertEqual( | origin_url = "https://old-pypi.example.org/project/limnoria/" | ||||
self.indexer.results, | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
[{"revision_id": rev_id, "origin_url": origin_url}], | "swh:1:rev:83b9b6c705b125d0fe6dd86b41109dc5fa32f874" | ||||
) | ) | ||||
def test_svn(self): | origin_url = "https://pypi.org/project/limnoria/" | ||||
assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | |||||
"swh:1:rel:83b9b6c705b125d0fe6dd86b41109dc5fa32f874" | |||||
) | |||||
def test_svn(storage): | |||||
origin_url = "http://0-512-md.googlecode.com/svn/" | origin_url = "http://0-512-md.googlecode.com/svn/" | ||||
self.indexer.run([origin_url]) | assert get_head_swhid(storage, origin_url) == CoreSWHID.from_string( | ||||
rev_id = b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8\xc9\xad#.\x1bw=\x18" | "swh:1:rev:e43f72e12c88abece79a87b8c9ad232e1b773d18" | ||||
self.assertEqual( | |||||
self.indexer.results, | |||||
[ | |||||
{ | |||||
"revision_id": rev_id, | |||||
"origin_url": origin_url, | |||||
} | |||||
], | |||||
) | ) |