diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -25,7 +25,11 @@ MetadataTargetType, RawExtrinsicMetadata, ) -from swh.loader.package.loader import PackageLoader, BasePackageInfo +from swh.loader.package.loader import ( + BasePackageInfo, + PackageLoader, + RawExtrinsicMetadataCore, +) from swh.loader.package.utils import download @@ -81,7 +85,11 @@ revision_parents=tuple(hash_to_bytes(p) for p in depo["revision_parents"]), raw_info=metadata, revision_extrinsic_metadata=[ - (None, "sword-v2-atom-codemeta-v2-in-json", raw_metadata_from_origin,), + RawExtrinsicMetadataCore( + format="sword-v2-atom-codemeta-v2-in-json", + metadata=raw_metadata_from_origin, + discovery_date=None, + ), ], ) diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -54,6 +54,18 @@ logger = logging.getLogger(__name__) +@attr.s +class RawExtrinsicMetadataCore: + """Contains the core of the metadata extracted by a loader, that will be + used to build a full RawExtrinsicMetadata object by adding object identifier, + context, and provenance information.""" + + format = attr.ib(type=str) + metadata = attr.ib(type=bytes) + discovery_date = attr.ib(type=Optional[datetime.datetime]) + """Defaults to the visit date.""" + + @attr.s class BasePackageInfo: """Compute the primary key for a dict using the id_keys as primary key @@ -77,12 +89,8 @@ # See revision_extrinsic_metadata = attr.ib( - type=List[Tuple[Optional[datetime.datetime], str, bytes]], - default=[], - kw_only=True, + type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, ) - """Tuple elements are respectively the 'discovery_date' (which defaults to the - visit date), 'format', and 'metadata' fields of RawExtrinsicMetadata""" # TODO: add support for metadata for origins, directories, and contents @@ -595,16 +603,16 @@ metadata_objects = [] - for (discovery_date, format, metadata) in p_info.revision_extrinsic_metadata: + for item in p_info.revision_extrinsic_metadata: metadata_objects.append( RawExtrinsicMetadata( type=MetadataTargetType.REVISION, id=SWHID(object_type="revision", object_id=revision_id), - discovery_date=discovery_date or self.visit_date, + discovery_date=item.discovery_date or self.visit_date, authority=authority, fetcher=fetcher, - format=format, - metadata=metadata, + format=item.format, + metadata=item.metadata, origin=self.url, ) ) diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py --- a/swh/loader/package/tests/test_loader_metadata.py +++ b/swh/loader/package/tests/test_loader_metadata.py @@ -8,7 +8,11 @@ import attr -from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.loader import ( + BasePackageInfo, + PackageLoader, + RawExtrinsicMetadataCore, +) from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import SWHID from swh.model.model import ( @@ -81,8 +85,8 @@ url=ORIGIN_URL, filename="archive.tgz", revision_extrinsic_metadata=[ - (m0.discovery_date, m0.format, m0.metadata), - (m1.discovery_date, m1.format, m1.metadata), + RawExtrinsicMetadataCore(m0.format, m0.metadata, m0.discovery_date), + RawExtrinsicMetadataCore(m1.format, m1.metadata, m1.discovery_date), ], )