Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
Show All 14 Lines | |||||
import pytest | import pytest | ||||
from swh.model.identifiers import parse_swhid | from swh.model.identifiers import parse_swhid | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
Origin, | |||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | Person, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
Timestamp, | Timestamp, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.storage import get_storage | |||||
from swh.storage.interface import ListOrder, PagedResult | from swh.storage.interface import ListOrder, PagedResult | ||||
from swh.storage.migrate_extrinsic_metadata import handle_row, debian_origins_from_row | from swh.storage.migrate_extrinsic_metadata import handle_row, debian_origins_from_row | ||||
FETCHER = MetadataFetcher( | FETCHER = MetadataFetcher( | ||||
name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | ||||
) | ) | ||||
SWH_AUTHORITY = MetadataAuthority( | SWH_AUTHORITY = MetadataAuthority( | ||||
type=MetadataAuthorityType.REGISTRY, | type=MetadataAuthorityType.REGISTRY, | ||||
url="https://softwareheritage.org/", | url="https://softwareheritage.org/", | ||||
metadata={}, | metadata={}, | ||||
) | ) | ||||
def now(): | |||||
return datetime.datetime.now(tz=datetime.timezone.utc) | |||||
def patch(function_name, *args, **kwargs): | def patch(function_name, *args, **kwargs): | ||||
# It's a long name, this function spares some line breaks in 'with' statements | # It's a long name, this function spares some line breaks in 'with' statements | ||||
return _patch( | return _patch( | ||||
"swh.storage.migrate_extrinsic_metadata." + function_name, *args, **kwargs | "swh.storage.migrate_extrinsic_metadata." + function_name, *args, **kwargs | ||||
) | ) | ||||
def test_debian_origins_from_row(): | def test_debian_origins_from_row(): | ||||
"""Tests debian_origins_from_row on a real example (with some parts | """Tests debian_origins_from_row on a real example (with some parts | ||||
omitted, for conciseness).""" | omitted, for conciseness).""" | ||||
origin_url = "deb://Debian/packages/kalgebra" | origin_url = "deb://Debian/packages/kalgebra" | ||||
visit = OriginVisit( | visit = OriginVisit( | ||||
origin=origin_url, | origin=origin_url, | ||||
date=datetime.datetime( | date=datetime.datetime( | ||||
2020, 1, 27, 19, 32, 3, 925498, tzinfo=datetime.timezone.utc, | 2020, 1, 27, 19, 32, 3, 925498, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
type="deb", | type="deb", | ||||
visit=280, | visit=280, | ||||
) | ) | ||||
def mock_origin_visit_get(origin, page_token, order): | storage = get_storage("memory") | ||||
if origin in ( | |||||
"deb://Debian-Security/packages/kalgebra", | |||||
"http://snapshot.debian.org/package/kalgebra/", | |||||
): | |||||
return PagedResult(results=[], next_page_token=None) | |||||
elif origin == "deb://Debian/packages/kalgebra": | |||||
if page_token == None: | |||||
return PagedResult( | |||||
# ... | |||||
results=[visit,], | |||||
next_page_token="280", | |||||
) | |||||
elif page_token == "280": | |||||
return PagedResult(results=[], next_page_token=None) | |||||
else: | |||||
assert False, page_token | |||||
else: | |||||
assert False, origin | |||||
storage = Mock() | storage.origin_add( | ||||
[ | |||||
Origin(url=origin_url), | |||||
Origin(url="http://snapshot.debian.org/package/kalgebra/"), | |||||
] | |||||
) | |||||
storage.origin_visit_get.side_effect = mock_origin_visit_get | storage.origin_visit_add([visit]) | ||||
storage.origin_visit_status_get.return_value = PagedResult( | storage.origin_visit_status_add( | ||||
results=[ | [ | ||||
OriginVisitStatus( | OriginVisitStatus( | ||||
origin=origin_url, | origin=origin_url, | ||||
visit=280, | visit=280, | ||||
date=datetime.datetime( | date=datetime.datetime( | ||||
2020, 1, 27, 19, 32, 3, 925498, tzinfo=datetime.timezone.utc | 2020, 1, 27, 19, 32, 3, 925498, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
status="full", | status="full", | ||||
snapshot=b"\xafD\x15\x98){\xd4$\xdeI\x1f\xbe\x95lh`x\x14\xce\xc4", | snapshot=b"\xafD\x15\x98){\xd4$\xdeI\x1f\xbe\x95lh`x\x14\xce\xc4", | ||||
metadata=None, | metadata=None, | ||||
) | ) | ||||
], | ], | ||||
next_page_token=None, | |||||
) | ) | ||||
snapshot = Snapshot( | snapshot = Snapshot( | ||||
id=b"\xafD\x15\x98){\xd4$\xdeI\x1f\xbe\x95lh`x\x14\xce\xc4", | id=b"\xafD\x15\x98){\xd4$\xdeI\x1f\xbe\x95lh`x\x14\xce\xc4", | ||||
branches={ | branches={ | ||||
# ... | # ... | ||||
b"releases/unstable/main/4:19.12.1-1": SnapshotBranch( | b"releases/unstable/main/4:19.12.1-1": SnapshotBranch( | ||||
target=b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | target=b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | ||||
Show All 10 Lines | revision_row = { | ||||
{ | { | ||||
"filename": "kalgebra_19.12.1-1.dsc", | "filename": "kalgebra_19.12.1-1.dsc", | ||||
# ... | # ... | ||||
}, | }, | ||||
] | ] | ||||
}, | }, | ||||
} | } | ||||
with patch("snapshot_get_all_branches", return_value=snapshot): | storage.snapshot_add([snapshot]) | ||||
assert debian_origins_from_row(revision_row, storage) == [origin_url] | assert debian_origins_from_row(revision_row, storage) == [origin_url] | ||||
assert storage.method_calls == [ | |||||
call.origin_visit_get( | |||||
"deb://Debian/packages/kalgebra", order=ListOrder.ASC, page_token=None | |||||
), | |||||
call.origin_visit_status_get( | |||||
"deb://Debian/packages/kalgebra", 280, order=ListOrder.ASC, page_token=None | |||||
), | |||||
call.origin_visit_get( | |||||
"deb://Debian-Security/packages/kalgebra", | |||||
order=ListOrder.ASC, | |||||
page_token=None, | |||||
), | |||||
call.origin_visit_get( | |||||
"http://snapshot.debian.org/package/kalgebra/", | |||||
order=ListOrder.ASC, | |||||
page_token=None, | |||||
), | |||||
] | |||||
def test_debian_origins_from_row__no_result(): | def test_debian_origins_from_row__no_result(): | ||||
"""Tests debian_origins_from_row when there's no origin, visit, status, | """Tests debian_origins_from_row when there's no origin, visit, status, | ||||
snapshot, branch, or matching branch. | snapshot, branch, or matching branch. | ||||
""" | """ | ||||
storage = Mock() | storage = get_storage("memory") | ||||
origin_url = "deb://Debian/packages/kalgebra" | origin_url = "deb://Debian/packages/kalgebra" | ||||
snapshot_id = b"42424242424242424242" | snapshot_id = b"42424242424242424242" | ||||
revision_id = b"21212121212121212121" | revision_id = b"21212121212121212121" | ||||
storage.origin_add([Origin(url=origin_url)]) | |||||
revision_row = { | revision_row = { | ||||
"id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | "id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | ||||
"metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, | "metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, | ||||
} | } | ||||
# no visit | # no visit | ||||
with patch("iter_origin_visits", return_value=[]): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
assert storage.method_calls == [] | storage.origin_visit_add( | ||||
[OriginVisit(origin=origin_url, date=now(), type="deb", visit=280,)] | |||||
visit = OriginVisit( | |||||
origin=origin_url, | |||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | |||||
type="deb", | |||||
visit=280, | |||||
) | ) | ||||
# no status | # no status | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[]): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
assert storage.method_calls == [] | |||||
status = OriginVisitStatus( | status = OriginVisitStatus( | ||||
origin=origin_url, | origin=origin_url, | ||||
visit=280, | visit=280, | ||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | date=now(), | ||||
status="full", | status="full", | ||||
snapshot=None, | snapshot=None, | ||||
metadata=None, | metadata=None, | ||||
) | ) | ||||
storage.origin_visit_status_add([status]) | |||||
# no snapshot | # no snapshot | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
assert storage.method_calls == [] | status = attr.evolve(status, snapshot=snapshot_id, date=now()) | ||||
storage.origin_visit_status_add([status]) | |||||
status = attr.evolve(status, snapshot=snapshot_id) | storage_before_snapshot = copy.deepcopy(storage) | ||||
snapshot = Snapshot(id=snapshot_id, branches={},) | snapshot = Snapshot(id=snapshot_id, branches={}) | ||||
storage.snapshot_add([snapshot]) | |||||
# no branch | # no branch | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
ardumont: that's subtle ;) | |||||
# "remove" the snapshot, so we can add a new one with the same id | |||||
storage = copy.deepcopy(storage_before_snapshot) | |||||
snapshot = attr.evolve(snapshot, branches={b"foo": None,},) | snapshot = attr.evolve(snapshot, branches={b"foo": None,},) | ||||
storage.snapshot_add([snapshot]) | |||||
# dangling branch | # dangling branch | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
assert storage.method_calls == [] | # "remove" the snapshot again | ||||
storage = copy.deepcopy(storage_before_snapshot) | |||||
snapshot = attr.evolve( | snapshot = attr.evolve( | ||||
snapshot, | snapshot, | ||||
branches={ | branches={ | ||||
b"foo": SnapshotBranch(target_type=TargetType.REVISION, target=revision_id,) | b"foo": SnapshotBranch(target_type=TargetType.REVISION, target=revision_id,) | ||||
}, | }, | ||||
) | ) | ||||
storage.revision_get.return_value = [None] | storage.snapshot_add([snapshot]) | ||||
# branch points to unknown revision | # branch points to unknown revision | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
assert storage.method_calls == [ | |||||
call.revision_get([revision_id]), | |||||
call.revision_get([revision_id]), | |||||
call.revision_get([revision_id]), | |||||
] | |||||
storage.reset_mock() | |||||
revision = Revision( | revision = Revision( | ||||
id=revision_id, | id=revision_id, | ||||
message=b"foo", | message=b"foo", | ||||
author=Person.from_fullname(b"foo"), | author=Person.from_fullname(b"foo"), | ||||
committer=Person.from_fullname(b"foo"), | committer=Person.from_fullname(b"foo"), | ||||
date=TimestampWithTimezone( | date=TimestampWithTimezone( | ||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | timestamp=Timestamp(seconds=1580076204, microseconds=0), | ||||
offset=60, | offset=60, | ||||
negative_utc=False, | negative_utc=False, | ||||
), | ), | ||||
committer_date=TimestampWithTimezone( | committer_date=TimestampWithTimezone( | ||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | timestamp=Timestamp(seconds=1580076204, microseconds=0), | ||||
offset=60, | offset=60, | ||||
negative_utc=False, | negative_utc=False, | ||||
), | ), | ||||
type=RevisionType.DSC, | type=RevisionType.DSC, | ||||
directory=b"\xd5\x9a\x1f\x9c\x80\x9d\x8c}19P\xf6\xc8\xa2\x0f^%H\xcd\xdb", | directory=b"\xd5\x9a\x1f\x9c\x80\x9d\x8c}19P\xf6\xc8\xa2\x0f^%H\xcd\xdb", | ||||
synthetic=True, | synthetic=True, | ||||
metadata=None, | metadata=None, | ||||
parents=(), | parents=(), | ||||
extra_headers=(), | extra_headers=(), | ||||
) | ) | ||||
storage.revision_get.return_value = [revision] | storage.revision_add([revision]) | ||||
# no matching branch | # no matching branch | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | assert debian_origins_from_row(revision_row, storage) == [] | ||||
assert storage.method_calls == [ | |||||
call.revision_get([revision_id]), | |||||
call.revision_get([revision_id]), | |||||
call.revision_get([revision_id]), | |||||
] | |||||
def test_debian_origins_from_row__check_revisions(): | def test_debian_origins_from_row__check_revisions(): | ||||
"""Tests debian_origins_from_row errors when the revision at the head | """Tests debian_origins_from_row errors when the revision at the head | ||||
of a branch is a DSC and has no parents | of a branch is a DSC and has no parents | ||||
""" | """ | ||||
storage = Mock() | storage = get_storage("memory") | ||||
origin_url = "deb://Debian/packages/kalgebra" | origin_url = "deb://Debian/packages/kalgebra" | ||||
revision_id = b"21" * 10 | |||||
storage.origin_add([Origin(url=origin_url)]) | |||||
revision_row = { | revision_row = { | ||||
"id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | "id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | ||||
"metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, | "metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, | ||||
} | } | ||||
visit = OriginVisit( | storage.origin_visit_add( | ||||
[ | |||||
OriginVisit( | |||||
origin=origin_url, | origin=origin_url, | ||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | date=datetime.datetime.now(tz=datetime.timezone.utc), | ||||
type="deb", | type="deb", | ||||
visit=280, | visit=280, | ||||
) | ) | ||||
] | |||||
) | |||||
status = OriginVisitStatus( | storage.origin_visit_status_add( | ||||
[ | |||||
OriginVisitStatus( | |||||
origin=origin_url, | origin=origin_url, | ||||
visit=280, | visit=280, | ||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | date=datetime.datetime.now(tz=datetime.timezone.utc), | ||||
status="full", | status="full", | ||||
snapshot=b"42" * 10, | snapshot=b"42" * 10, | ||||
metadata=None, | metadata=None, | ||||
) | ) | ||||
snapshot = Snapshot( | ] | ||||
) | |||||
storage.snapshot_add( | |||||
[ | |||||
Snapshot( | |||||
id=b"42" * 10, | id=b"42" * 10, | ||||
branches={ | branches={ | ||||
b"foo": SnapshotBranch(target_type=TargetType.REVISION, target=b"21" * 10) | b"foo": SnapshotBranch( | ||||
target_type=TargetType.REVISION, target=revision_id | |||||
) | |||||
}, | }, | ||||
) | ) | ||||
] | |||||
) | |||||
storage_before_revision = copy.deepcopy(storage) | |||||
revision = Revision( | revision = Revision( | ||||
id=b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | id=revision_id, | ||||
message=b"foo", | message=b"foo", | ||||
author=Person.from_fullname(b"foo"), | author=Person.from_fullname(b"foo"), | ||||
committer=Person.from_fullname(b"foo"), | committer=Person.from_fullname(b"foo"), | ||||
date=TimestampWithTimezone( | date=TimestampWithTimezone( | ||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | timestamp=Timestamp(seconds=1580076204, microseconds=0), | ||||
offset=60, | offset=60, | ||||
negative_utc=False, | negative_utc=False, | ||||
), | ), | ||||
committer_date=TimestampWithTimezone( | committer_date=TimestampWithTimezone( | ||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | timestamp=Timestamp(seconds=1580076204, microseconds=0), | ||||
offset=60, | offset=60, | ||||
negative_utc=False, | negative_utc=False, | ||||
), | ), | ||||
type=RevisionType.DSC, | type=RevisionType.DSC, | ||||
directory=b"\xd5\x9a\x1f\x9c\x80\x9d\x8c}19P\xf6\xc8\xa2\x0f^%H\xcd\xdb", | directory=b"\xd5\x9a\x1f\x9c\x80\x9d\x8c}19P\xf6\xc8\xa2\x0f^%H\xcd\xdb", | ||||
synthetic=True, | synthetic=True, | ||||
metadata=None, | metadata=None, | ||||
parents=(b"parent " * 2,), | parents=(b"parent " * 2,), | ||||
extra_headers=(), | extra_headers=(), | ||||
) | ) | ||||
storage.revision_add([revision]) | |||||
storage.revision_get.return_value = [revision] | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
with pytest.raises(AssertionError, match="DSC revision with parents"): | with pytest.raises(AssertionError, match="DSC revision with parents"): | ||||
debian_origins_from_row(revision_row, storage) | debian_origins_from_row(revision_row, storage) | ||||
storage = copy.deepcopy(storage_before_revision) | |||||
revision = attr.evolve(revision, type=RevisionType.GIT) | revision = attr.evolve(revision, type=RevisionType.GIT) | ||||
storage.revision_get.return_value = [revision] | storage.revision_add([revision]) | ||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
with pytest.raises(AssertionError, match="non-DSC revision"): | with pytest.raises(AssertionError, match="non-DSC revision"): | ||||
debian_origins_from_row(revision_row, storage) | debian_origins_from_row(revision_row, storage) | ||||
def test_debian_with_extrinsic(): | def test_debian_with_extrinsic(): | ||||
dest_original_artifacts = [ | dest_original_artifacts = [ | ||||
{ | { | ||||
"length": 2936, | "length": 2936, | ||||
"filename": "kalgebra_19.12.1-1.dsc", | "filename": "kalgebra_19.12.1-1.dsc", | ||||
"checksums": { | "checksums": { | ||||
▲ Show 20 Lines • Show All 243 Lines • Show Last 20 Lines |
that's subtle ;)