Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/migrate_extrinsic_metadata/test_debian.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
# flake8: noqa | # flake8: noqa | ||||
# because of long lines | # because of long lines | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import json | import json | ||||
from unittest.mock import call, Mock | from unittest.mock import call, Mock, patch as _patch | ||||
import attr | |||||
import pytest | |||||
from swh.model.identifiers import parse_swhid | from swh.model.identifiers import parse_swhid | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
OriginVisit, | |||||
OriginVisitStatus, | |||||
Person, | |||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Revision, | |||||
RevisionType, | |||||
Snapshot, | |||||
SnapshotBranch, | |||||
TargetType, | |||||
Timestamp, | |||||
TimestampWithTimezone, | |||||
) | ) | ||||
from swh.storage.migrate_extrinsic_metadata import handle_row | from swh.storage.interface import ListOrder, PagedResult | ||||
from swh.storage.migrate_extrinsic_metadata import handle_row, debian_origins_from_row | |||||
FETCHER = MetadataFetcher( | FETCHER = MetadataFetcher( | ||||
name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | ||||
) | ) | ||||
SWH_AUTHORITY = MetadataAuthority( | SWH_AUTHORITY = MetadataAuthority( | ||||
type=MetadataAuthorityType.REGISTRY, | type=MetadataAuthorityType.REGISTRY, | ||||
url="https://softwareheritage.org/", | url="https://softwareheritage.org/", | ||||
metadata={}, | metadata={}, | ||||
) | ) | ||||
def patch(function_name, *args, **kwargs): | |||||
# It's a long name, this function spares some line breaks in 'with' statements | |||||
return _patch( | |||||
"swh.storage.migrate_extrinsic_metadata." + function_name, *args, **kwargs | |||||
) | |||||
def test_debian_origins_from_row(): | |||||
"""Tests debian_origins_from_row on a real example (with some parts | |||||
omitted, for conciseness).""" | |||||
origin_url = "deb://Debian/packages/kalgebra" | |||||
visit = OriginVisit( | |||||
origin=origin_url, | |||||
date=datetime.datetime( | |||||
2020, 1, 27, 19, 32, 3, 925498, tzinfo=datetime.timezone.utc, | |||||
), | |||||
type="deb", | |||||
visit=280, | |||||
) | |||||
def mock_origin_visit_get(origin, page_token, order): | |||||
if origin in ( | |||||
"deb://Debian-Security/packages/kalgebra", | |||||
"http://snapshot.debian.org/package/kalgebra/", | |||||
): | |||||
return PagedResult(results=[], next_page_token=None) | |||||
elif origin == "deb://Debian/packages/kalgebra": | |||||
if page_token == None: | |||||
return PagedResult( | |||||
# ... | |||||
results=[visit,], | |||||
next_page_token="280", | |||||
) | |||||
elif page_token == "280": | |||||
return PagedResult(results=[], next_page_token=None) | |||||
else: | |||||
assert False, page_token | |||||
else: | |||||
assert False, origin | |||||
storage = Mock() | |||||
storage.origin_visit_get.side_effect = mock_origin_visit_get | |||||
storage.origin_visit_status_get.return_value = PagedResult( | |||||
results=[ | |||||
OriginVisitStatus( | |||||
origin=origin_url, | |||||
visit=280, | |||||
date=datetime.datetime( | |||||
2020, 1, 27, 19, 32, 3, 925498, tzinfo=datetime.timezone.utc | |||||
), | |||||
status="full", | |||||
snapshot=b"\xafD\x15\x98){\xd4$\xdeI\x1f\xbe\x95lh`x\x14\xce\xc4", | |||||
metadata=None, | |||||
) | |||||
], | |||||
next_page_token=None, | |||||
) | |||||
snapshot = Snapshot( | |||||
id=b"\xafD\x15\x98){\xd4$\xdeI\x1f\xbe\x95lh`x\x14\xce\xc4", | |||||
branches={ | |||||
# ... | |||||
b"releases/unstable/main/4:19.12.1-1": SnapshotBranch( | |||||
target=b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | |||||
target_type=TargetType.REVISION, | |||||
), | |||||
}, | |||||
) | |||||
revision_row = { | |||||
"id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | |||||
"metadata": { | |||||
# ... | |||||
"original_artifact": [ | |||||
{ | |||||
"filename": "kalgebra_19.12.1-1.dsc", | |||||
# ... | |||||
}, | |||||
] | |||||
}, | |||||
} | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [origin_url] | |||||
assert storage.method_calls == [ | |||||
call.origin_visit_get( | |||||
"deb://Debian/packages/kalgebra", order=ListOrder.ASC, page_token=None | |||||
), | |||||
call.origin_visit_status_get( | |||||
"deb://Debian/packages/kalgebra", 280, order=ListOrder.ASC, page_token=None | |||||
), | |||||
call.origin_visit_get( | |||||
"deb://Debian-Security/packages/kalgebra", | |||||
order=ListOrder.ASC, | |||||
page_token=None, | |||||
), | |||||
call.origin_visit_get( | |||||
"http://snapshot.debian.org/package/kalgebra/", | |||||
order=ListOrder.ASC, | |||||
page_token=None, | |||||
), | |||||
] | |||||
def test_debian_origins_from_row__no_result(): | |||||
"""Tests debian_origins_from_row when there's no origin, visit, status, | |||||
snapshot, branch, or matching branch. | |||||
""" | |||||
storage = Mock() | |||||
origin_url = "deb://Debian/packages/kalgebra" | |||||
revision_row = { | |||||
"id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | |||||
"metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, | |||||
} | |||||
# no visit | |||||
with patch("iter_origin_visits", return_value=[]): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | |||||
assert storage.method_calls == [] | |||||
visit = OriginVisit( | |||||
origin=origin_url, | |||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | |||||
type="deb", | |||||
visit=280, | |||||
) | |||||
# no status | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[]): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | |||||
assert storage.method_calls == [] | |||||
status = OriginVisitStatus( | |||||
origin=origin_url, | |||||
visit=280, | |||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | |||||
status="full", | |||||
snapshot=None, | |||||
metadata=None, | |||||
) | |||||
# no snapshot | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | |||||
assert storage.method_calls == [] | |||||
status = attr.evolve(status, snapshot=b"42" * 10) | |||||
snapshot = Snapshot(id=b"42" * 10, branches={},) | |||||
# no branch | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | |||||
revision = Revision( | |||||
id=b"21" * 10, | |||||
message=b"foo", | |||||
author=Person.from_fullname(b"foo"), | |||||
committer=Person.from_fullname(b"foo"), | |||||
date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | |||||
offset=60, | |||||
negative_utc=False, | |||||
), | |||||
committer_date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | |||||
offset=60, | |||||
negative_utc=False, | |||||
), | |||||
type=RevisionType.DSC, | |||||
directory=b"\xd5\x9a\x1f\x9c\x80\x9d\x8c}19P\xf6\xc8\xa2\x0f^%H\xcd\xdb", | |||||
synthetic=True, | |||||
metadata=None, | |||||
parents=(), | |||||
extra_headers=(), | |||||
) | |||||
storage.revision_get.return_value = [revision] | |||||
# no matching branch | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
assert debian_origins_from_row(revision_row, storage) == [] | |||||
assert storage.method_calls == [] | |||||
def test_debian_origins_from_row__check_revisions(): | |||||
"""Tests debian_origins_from_row errors when the revision at the head | |||||
of a branch is a DSC and has no parents | |||||
""" | |||||
storage = Mock() | |||||
origin_url = "deb://Debian/packages/kalgebra" | |||||
revision_row = { | |||||
"id": b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | |||||
"metadata": {"original_artifact": [{"filename": "kalgebra_19.12.1-1.dsc",},]}, | |||||
} | |||||
visit = OriginVisit( | |||||
origin=origin_url, | |||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | |||||
type="deb", | |||||
visit=280, | |||||
) | |||||
status = OriginVisitStatus( | |||||
origin=origin_url, | |||||
visit=280, | |||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | |||||
status="full", | |||||
snapshot=b"42" * 10, | |||||
metadata=None, | |||||
) | |||||
snapshot = Snapshot( | |||||
id=b"42" * 10, | |||||
branches={ | |||||
b"foo": SnapshotBranch(target_type=TargetType.REVISION, target=b"21" * 10) | |||||
}, | |||||
) | |||||
revision = Revision( | |||||
id=b"\x00\x00\x03l1\x1e\xf3:(\x1b\x05h\x8fn\xad\xcf\xc0\x94:\xee", | |||||
message=b"foo", | |||||
author=Person.from_fullname(b"foo"), | |||||
committer=Person.from_fullname(b"foo"), | |||||
date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | |||||
offset=60, | |||||
negative_utc=False, | |||||
), | |||||
committer_date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1580076204, microseconds=0), | |||||
offset=60, | |||||
negative_utc=False, | |||||
), | |||||
type=RevisionType.DSC, | |||||
directory=b"\xd5\x9a\x1f\x9c\x80\x9d\x8c}19P\xf6\xc8\xa2\x0f^%H\xcd\xdb", | |||||
synthetic=True, | |||||
metadata=None, | |||||
parents=(b"parent " * 2,), | |||||
extra_headers=(), | |||||
) | |||||
storage.revision_get.return_value = [revision] | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
with pytest.raises(AssertionError, match="DSC revision with parents"): | |||||
debian_origins_from_row(revision_row, storage) | |||||
revision = attr.evolve(revision, type=RevisionType.GIT) | |||||
storage.revision_get.return_value = [revision] | |||||
with patch("iter_origin_visits", return_value=[visit]): | |||||
with patch("iter_origin_visit_statuses", return_value=[status]): | |||||
with patch("snapshot_get_all_branches", return_value=snapshot): | |||||
with pytest.raises(AssertionError, match="non-DSC revision"): | |||||
debian_origins_from_row(revision_row, storage) | |||||
def test_debian_with_extrinsic(): | def test_debian_with_extrinsic(): | ||||
dest_original_artifacts = [ | dest_original_artifacts = [ | ||||
{ | { | ||||
"length": 2936, | "length": 2936, | ||||
"filename": "kalgebra_19.12.1-1.dsc", | "filename": "kalgebra_19.12.1-1.dsc", | ||||
"checksums": { | "checksums": { | ||||
"sha1": "f869e9f1155b1ee6d28ae3b40060570152a358cd", | "sha1": "f869e9f1155b1ee6d28ae3b40060570152a358cd", | ||||
"sha256": "75f77150aefdaa4bcf8bc5b1e9b8b90b5cb1651b76a068c5e58e5b83658d5d11", | "sha256": "75f77150aefdaa4bcf8bc5b1e9b8b90b5cb1651b76a068c5e58e5b83658d5d11", | ||||
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | row = { | ||||
# ... | # ... | ||||
}, | }, | ||||
"tool": "dsc", | "tool": "dsc", | ||||
}, | }, | ||||
"original_artifact": source_original_artifacts, | "original_artifact": source_original_artifacts, | ||||
}, | }, | ||||
} | } | ||||
origin_url = "deb://Debian/packages/kalgebra" | |||||
storage = Mock() | storage = Mock() | ||||
deposit_cur = None | deposit_cur = None | ||||
with patch("debian_origins_from_row", return_value=[origin_url]): | |||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=parse_swhid( | ||||
"swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee" | "swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee" | ||||
), | ), | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc, | 2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc, | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=origin_url, | |||||
), | ), | ||||
] | ] | ||||
) | ), | ||||
] | ] | ||||
def test_debian_without_extrinsic(): | def test_debian_without_extrinsic(): | ||||
source_original_artifacts = [ | source_original_artifacts = [ | ||||
{ | { | ||||
"name": "pymongo_1.10-1.dsc", | "name": "pymongo_1.10-1.dsc", | ||||
"sha1": "81877c1ae4406c2519b9cc9c4557cf6b0775a241", | "sha1": "81877c1ae4406c2519b9cc9c4557cf6b0775a241", | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | row = { | ||||
}, | }, | ||||
"lister_metadata": {"id": 244296, "lister": "snapshot.debian.org"}, | "lister_metadata": {"id": 244296, "lister": "snapshot.debian.org"}, | ||||
}, | }, | ||||
"original_artifact": source_original_artifacts, | "original_artifact": source_original_artifacts, | ||||
}, | }, | ||||
} | } | ||||
storage = Mock() | storage = Mock() | ||||
origin_url = "http://snapshot.debian.org/package/pymongo" | |||||
deposit_cur = None | deposit_cur = None | ||||
with patch("debian_origins_from_row", return_value=[origin_url]): | |||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=parse_swhid( | id=parse_swhid( | ||||
"swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7" | "swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7" | ||||
), | ), | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc | 2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=origin_url, | |||||
), | ), | ||||
] | ] | ||||
) | ) | ||||
] | ] |