Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | SWH_METADATA_AUTHORITY = MetadataAuthority( | ||||
url="https://softwareheritage.org/", | url="https://softwareheritage.org/", | ||||
metadata={}, | metadata={}, | ||||
) | ) | ||||
"""Metadata authority for extrinsic metadata generated by Software Heritage. | """Metadata authority for extrinsic metadata generated by Software Heritage. | ||||
Used for metadata on "original artifacts", ie. length, filename, and checksums | Used for metadata on "original artifacts", ie. length, filename, and checksums | ||||
of downloaded archive files.""" | of downloaded archive files.""" | ||||
PartialExtID = Tuple[str, bytes] | PartialExtID = Tuple[str, int, bytes] | ||||
"""The ``extid_type`` and ``extid`` fields of an :class:`ExtID` object.""" | """The ``extid_type`` and ``extid`` fields of an :class:`ExtID` object.""" | ||||
@attr.s | @attr.s | ||||
class RawExtrinsicMetadataCore: | class RawExtrinsicMetadataCore: | ||||
"""Contains the core of the metadata extracted by a loader, that will be | """Contains the core of the metadata extracted by a loader, that will be | ||||
used to build a full RawExtrinsicMetadata object by adding object identifier, | used to build a full RawExtrinsicMetadata object by adding object identifier, | ||||
context, and provenance information.""" | context, and provenance information.""" | ||||
Show All 23 Lines | class BasePackageInfo: | ||||
version = attr.ib(type=str) | version = attr.ib(type=str) | ||||
"""Version name/number.""" | """Version name/number.""" | ||||
MANIFEST_FORMAT: Optional[string.Template] = None | MANIFEST_FORMAT: Optional[string.Template] = None | ||||
"""If not None, used by the default extid() implementation to format a manifest, | """If not None, used by the default extid() implementation to format a manifest, | ||||
before hashing it to produce an ExtID.""" | before hashing it to produce an ExtID.""" | ||||
EXTID_TYPE: str = "package-manifest-sha256" | EXTID_TYPE: str = "package-manifest-sha256" | ||||
EXTID_VERSION: int = 0 | |||||
# The following attribute has kw_only=True in order to allow subclasses | # The following attribute has kw_only=True in order to allow subclasses | ||||
# to add attributes. Without kw_only, attributes without default values cannot | # to add attributes. Without kw_only, attributes without default values cannot | ||||
# go after attributes with default values. | # go after attributes with default values. | ||||
# See <https://github.com/python-attrs/attrs/issues/38> | # See <https://github.com/python-attrs/attrs/issues/38> | ||||
directory_extrinsic_metadata = attr.ib( | directory_extrinsic_metadata = attr.ib( | ||||
type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | ||||
Show All 9 Lines | def extid(self) -> Optional[PartialExtID]: | ||||
we will always load it, instead of checking the ExtID storage | we will always load it, instead of checking the ExtID storage | ||||
to see if we already did)""" | to see if we already did)""" | ||||
if self.MANIFEST_FORMAT is None: | if self.MANIFEST_FORMAT is None: | ||||
return None | return None | ||||
else: | else: | ||||
manifest = self.MANIFEST_FORMAT.substitute( | manifest = self.MANIFEST_FORMAT.substitute( | ||||
{k: str(v) for (k, v) in attr.asdict(self).items()} | {k: str(v) for (k, v) in attr.asdict(self).items()} | ||||
) | ) | ||||
return (self.EXTID_TYPE, hashlib.sha256(manifest.encode()).digest()) | return ( | ||||
self.EXTID_TYPE, | |||||
self.EXTID_VERSION, | |||||
hashlib.sha256(manifest.encode()).digest(), | |||||
) | |||||
TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | ||||
class PackageLoader(BaseLoader, Generic[TPackageInfo]): | class PackageLoader(BaseLoader, Generic[TPackageInfo]): | ||||
# Origin visit type (str) set by the loader | # Origin visit type (str) set by the loader | ||||
visit_type = "" | visit_type = "" | ||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | class PackageLoader(BaseLoader, Generic[TPackageInfo]): | ||||
def _get_known_extids( | def _get_known_extids( | ||||
self, packages_info: List[TPackageInfo] | self, packages_info: List[TPackageInfo] | ||||
) -> Dict[PartialExtID, List[CoreSWHID]]: | ) -> Dict[PartialExtID, List[CoreSWHID]]: | ||||
"""Compute the ExtIDs from new PackageInfo objects, searches which are already | """Compute the ExtIDs from new PackageInfo objects, searches which are already | ||||
loaded in the archive, and returns them if any.""" | loaded in the archive, and returns them if any.""" | ||||
# Compute the ExtIDs of all the new packages, grouped by extid type | # Compute the ExtIDs of all the new packages, grouped by extid type | ||||
new_extids: Dict[str, List[bytes]] = {} | new_extids: Dict[Tuple[str, int], List[bytes]] = {} | ||||
for p_info in packages_info: | for p_info in packages_info: | ||||
res = p_info.extid() | res = p_info.extid() | ||||
if res is not None: | if res is not None: | ||||
(extid_type, extid_extid) = res | (extid_type, extid_version, extid_extid) = res | ||||
new_extids.setdefault(extid_type, []).append(extid_extid) | new_extids.setdefault((extid_type, extid_version), []).append( | ||||
extid_extid | |||||
) | |||||
# For each extid type, call extid_get_from_extid() with all the extids of | # For each extid type, call extid_get_from_extid() with all the extids of | ||||
# that type, and store them in the '(type, extid) -> target' map. | # that type, and store them in the '(type, extid) -> target' map. | ||||
known_extids: Dict[PartialExtID, List[CoreSWHID]] = {} | known_extids: Dict[PartialExtID, List[CoreSWHID]] = {} | ||||
for (extid_type, extids) in new_extids.items(): | for ((extid_type, extid_version), extids) in new_extids.items(): | ||||
for extid in self.storage.extid_get_from_extid(extid_type, extids): | for extid in self.storage.extid_get_from_extid( | ||||
extid_type, extids, version=extid_version | |||||
): | |||||
if extid is not None: | if extid is not None: | ||||
key = (extid.extid_type, extid.extid) | key = (extid.extid_type, extid_version, extid.extid) | ||||
known_extids.setdefault(key, []).append(extid.target) | known_extids.setdefault(key, []).append(extid.target) | ||||
return known_extids | return known_extids | ||||
def resolve_object_from_extids( | def resolve_object_from_extids( | ||||
self, | self, | ||||
known_extids: Dict[PartialExtID, List[CoreSWHID]], | known_extids: Dict[PartialExtID, List[CoreSWHID]], | ||||
p_info: TPackageInfo, | p_info: TPackageInfo, | ||||
▲ Show 20 Lines • Show All 388 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
else: | else: | ||||
assert False, f"Unexpected object type: {swhid}" | assert False, f"Unexpected object type: {swhid}" | ||||
assert release_id is not None | assert release_id is not None | ||||
if add_extid: | if add_extid: | ||||
partial_extid = p_info.extid() | partial_extid = p_info.extid() | ||||
if partial_extid is not None: | if partial_extid is not None: | ||||
(extid_type, extid) = partial_extid | (extid_type, extid_version, extid) = partial_extid | ||||
release_swhid = CoreSWHID( | release_swhid = CoreSWHID( | ||||
object_type=ObjectType.RELEASE, object_id=release_id | object_type=ObjectType.RELEASE, object_id=release_id | ||||
) | ) | ||||
new_extids.add( | new_extids.add( | ||||
ExtID(extid_type=extid_type, extid=extid, target=release_swhid) | ExtID( | ||||
extid_type=extid_type, | |||||
extid_version=extid_version, | |||||
extid=extid, | |||||
target=release_swhid, | |||||
) | |||||
) | ) | ||||
tmp_releases[p_info.version].append((branch_name, release_id)) | tmp_releases[p_info.version].append((branch_name, release_id)) | ||||
if load_exceptions: | if load_exceptions: | ||||
status_visit = "partial" | status_visit = "partial" | ||||
if not tmp_releases: | if not tmp_releases: | ||||
▲ Show 20 Lines • Show All 386 Lines • Show Last 20 Lines |