Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | class BasePackageInfo: | ||||
# See <https://github.com/python-attrs/attrs/issues/38> | # See <https://github.com/python-attrs/attrs/issues/38> | ||||
directory_extrinsic_metadata = attr.ib( | directory_extrinsic_metadata = attr.ib( | ||||
type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | ||||
) | ) | ||||
# TODO: add support for metadata for directories and contents | # TODO: add support for metadata for directories and contents | ||||
def extid(self) -> Optional[bytes]: | |||||
"""Returns a unique intrinsic identifier of this package info, | |||||
or None if this package info is not 'deduplicatable' (meaning that | |||||
we will always load it, instead of checking the ExtID storage | |||||
to see if we already did)""" | |||||
return None | |||||
class BaseManifestPackageInfo(BasePackageInfo): | |||||
"""Base class for PackageInfo classes that provide an extid based on | |||||
the hash of a manifest made of some of their attributes..""" | |||||
@property | @property | ||||
def MANIFEST_FORMAT(self) -> string.Template: | def MANIFEST_FORMAT(self) -> string.Template: | ||||
"""A string.Template object used to format a manifest, which is hashed | """A string.Template object used to format a manifest, which is hashed | ||||
to get the extid of this package info object""" | to get the extid of this package info object""" | ||||
raise NotImplementedError( | raise NotImplementedError( | ||||
f"{self.__class__.__name__} is missing MANIFEST_FORMAT " | f"{self.__class__.__name__} is missing MANIFEST_FORMAT " | ||||
f"or an override of extid()" | f"or an override of extid()" | ||||
) | ) | ||||
def extid(self) -> bytes: | def extid(self) -> Optional[bytes]: | ||||
"""Returns a unique intrinsic identifier of this package info""" | |||||
manifest = self.MANIFEST_FORMAT.substitute( | manifest = self.MANIFEST_FORMAT.substitute( | ||||
{k: str(v) for (k, v) in attr.asdict(self).items()} | {k: str(v) for (k, v) in attr.asdict(self).items()} | ||||
) | ) | ||||
return hashlib.sha256(manifest.encode()).digest() | return hashlib.sha256(manifest.encode()).digest() | ||||
TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | ) -> Dict[Sha1Git, Optional[ImmutableDict[str, object]]]: | ||||
for rev in snapshot.branches.values() | for rev in snapshot.branches.values() | ||||
if rev and rev.target_type == TargetType.REVISION | if rev and rev.target_type == TargetType.REVISION | ||||
] | ] | ||||
known_revisions = self.storage.revision_get(revs) | known_revisions = self.storage.revision_get(revs) | ||||
return { | return { | ||||
revision.id: revision.metadata for revision in known_revisions if revision | revision.id: revision.metadata for revision in known_revisions if revision | ||||
} | } | ||||
def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[bytes]: | |||||
return p_info.extid() | |||||
def known_artifact_to_extid(self, known_artifact: Dict) -> Optional[bytes]: | |||||
"""Returns a unique intrinsic identifier of a downloaded artifact, | |||||
used to check if a new artifact is the same.""" | |||||
return None | |||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, p_info: TPackageInfo, | self, known_artifacts: Dict, p_info: TPackageInfo, | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
"""Resolve the revision from a snapshot and an artifact metadata dict. | """Resolve the revision from a snapshot and an artifact metadata dict. | ||||
If the artifact has already been downloaded, this will return the | If the artifact has already been downloaded, this will return the | ||||
existing revision targeting that uncompressed artifact directory. | existing revision targeting that uncompressed artifact directory. | ||||
Otherwise, this returns None. | Otherwise, this returns None. | ||||
Args: | Args: | ||||
snapshot: Snapshot | snapshot: Snapshot | ||||
p_info: Package information | p_info: Package information | ||||
Returns: | Returns: | ||||
None or revision identifier | None or revision identifier | ||||
""" | """ | ||||
if not known_artifacts: | |||||
# No known artifact, no need to compute the artifact's extid | |||||
return None | |||||
new_extid = self.new_packageinfo_to_extid(p_info) | |||||
if new_extid is None: | |||||
# This loader does not support deduplication, at least not for this | |||||
# artifact. | |||||
return None | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
known_extid = self.known_artifact_to_extid(known_artifact) | |||||
if new_extid == known_extid: | |||||
return rev_id | |||||
return None | return None | ||||
def download_package( | def download_package( | ||||
self, p_info: TPackageInfo, tmpdir: str | self, p_info: TPackageInfo, tmpdir: str | ||||
) -> List[Tuple[str, Mapping]]: | ) -> List[Tuple[str, Mapping]]: | ||||
"""Download artifacts for a specific package. All downloads happen in | """Download artifacts for a specific package. All downloads happen in | ||||
in the tmpdir folder. | in the tmpdir folder. | ||||
▲ Show 20 Lines • Show All 586 Lines • Show Last 20 Lines |