Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | class BasePackageInfo: | ||||
Returns: | Returns: | ||||
The identity for that dict entry | The identity for that dict entry | ||||
""" | """ | ||||
url = attr.ib(type=str) | url = attr.ib(type=str) | ||||
filename = attr.ib(type=Optional[str]) | filename = attr.ib(type=Optional[str]) | ||||
MANIFEST_FORMAT: Optional[string.Template] = None | |||||
"""If not None, used by the default extid() implementation to format a manifest, | |||||
before hashing it to produce an ExtID.""" | |||||
# The following attribute has kw_only=True in order to allow subclasses | # The following attribute has kw_only=True in order to allow subclasses | ||||
# to add attributes. Without kw_only, attributes without default values cannot | # to add attributes. Without kw_only, attributes without default values cannot | ||||
# go after attributes with default values. | # go after attributes with default values. | ||||
# See <https://github.com/python-attrs/attrs/issues/38> | # See <https://github.com/python-attrs/attrs/issues/38> | ||||
directory_extrinsic_metadata = attr.ib( | directory_extrinsic_metadata = attr.ib( | ||||
type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | ||||
) | ) | ||||
# TODO: add support for metadata for directories and contents | # TODO: add support for metadata for directories and contents | ||||
@property | def extid(self) -> Optional[bytes]: | ||||
def MANIFEST_FORMAT(self) -> string.Template: | """Returns a unique intrinsic identifier of this package info, | ||||
"""A string.Template object used to format a manifest, which is hashed | or None if this package info is not 'deduplicatable' (meaning that | ||||
to get the extid of this package info object""" | we will always load it, instead of checking the ExtID storage | ||||
raise NotImplementedError( | to see if we already did)""" | ||||
f"{self.__class__.__name__} is missing MANIFEST_FORMAT " | if self.MANIFEST_FORMAT is None: | ||||
f"or an override of extid()" | return None | ||||
) | else: | ||||
def extid(self) -> bytes: | |||||
"""Returns a unique intrinsic identifier of this package info""" | |||||
manifest = self.MANIFEST_FORMAT.substitute( | manifest = self.MANIFEST_FORMAT.substitute( | ||||
{k: str(v) for (k, v) in attr.asdict(self).items()} | {k: str(v) for (k, v) in attr.asdict(self).items()} | ||||
) | ) | ||||
return hashlib.sha256(manifest.encode()).digest() | return hashlib.sha256(manifest.encode()).digest() | ||||
TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | ||||
class PackageLoader(BaseLoader, Generic[TPackageInfo]): | class PackageLoader(BaseLoader, Generic[TPackageInfo]): | ||||
# Origin visit type (str) set by the loader | # Origin visit type (str) set by the loader | ||||
visit_type = "" | visit_type = "" | ||||
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | ) -> Dict[Sha1Git, Optional[ImmutableDict[str, object]]]: | ||||
for rev in snapshot.branches.values() | for rev in snapshot.branches.values() | ||||
if rev and rev.target_type == TargetType.REVISION | if rev and rev.target_type == TargetType.REVISION | ||||
] | ] | ||||
known_revisions = self.storage.revision_get(revs) | known_revisions = self.storage.revision_get(revs) | ||||
return { | return { | ||||
revision.id: revision.metadata for revision in known_revisions if revision | revision.id: revision.metadata for revision in known_revisions if revision | ||||
} | } | ||||
def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[bytes]: | |||||
return p_info.extid() | |||||
def known_artifact_to_extid(self, known_artifact: Dict) -> Optional[bytes]: | |||||
"""Returns a unique intrinsic identifier of a downloaded artifact, | |||||
used to check if a new artifact is the same.""" | |||||
return None | |||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, p_info: TPackageInfo, | self, known_artifacts: Dict, p_info: TPackageInfo, | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
"""Resolve the revision from a snapshot and an artifact metadata dict. | """Resolve the revision from a snapshot and an artifact metadata dict. | ||||
If the artifact has already been downloaded, this will return the | If the artifact has already been downloaded, this will return the | ||||
existing revision targeting that uncompressed artifact directory. | existing revision targeting that uncompressed artifact directory. | ||||
Otherwise, this returns None. | Otherwise, this returns None. | ||||
Args: | Args: | ||||
snapshot: Snapshot | snapshot: Snapshot | ||||
p_info: Package information | p_info: Package information | ||||
Returns: | Returns: | ||||
None or revision identifier | None or revision identifier | ||||
""" | """ | ||||
if not known_artifacts: | |||||
# No known artifact, no need to compute the artifact's extid | |||||
return None | |||||
new_extid = self.new_packageinfo_to_extid(p_info) | |||||
if new_extid is None: | |||||
# This loader does not support deduplication, at least not for this | |||||
# artifact. | |||||
return None | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
known_extid = self.known_artifact_to_extid(known_artifact) | |||||
if new_extid == known_extid: | |||||
return rev_id | |||||
return None | return None | ||||
def download_package( | def download_package( | ||||
self, p_info: TPackageInfo, tmpdir: str | self, p_info: TPackageInfo, tmpdir: str | ||||
) -> List[Tuple[str, Mapping]]: | ) -> List[Tuple[str, Mapping]]: | ||||
"""Download artifacts for a specific package. All downloads happen in | """Download artifacts for a specific package. All downloads happen in | ||||
in the tmpdir folder. | in the tmpdir folder. | ||||
▲ Show 20 Lines • Show All 586 Lines • Show Last 20 Lines |