Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | |||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
@attr.s | @attr.s | ||||
class BasePackageInfo: | class BasePackageInfo: | ||||
"""Compute the primary key for a dict using the id_keys as primary key | |||||
composite. | |||||
Args: | |||||
d: A dict entry to compute the primary key on | |||||
id_keys: Sequence of keys to use as primary key | |||||
Returns: | |||||
The identity for that dict entry | |||||
""" | |||||
url = attr.ib(type=str) | url = attr.ib(type=str) | ||||
filename = attr.ib(type=Optional[str]) | filename = attr.ib(type=Optional[str]) | ||||
raw = attr.ib(type=Any) | raw = attr.ib(type=Any) | ||||
@property | |||||
def ID_KEYS(self): | |||||
raise NotImplementedError(f"{self.__class__.__name__} is missing ID_KEYS") | |||||
def artifact_identity(self): | |||||
return [getattr(self, k) for k in self.ID_KEYS] | |||||
TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) | ||||
class PackageLoader(Generic[TPackageInfo]): | class PackageLoader(Generic[TPackageInfo]): | ||||
# Origin visit type (str) set by the loader | # Origin visit type (str) set by the loader | ||||
visit_type = "" | visit_type = "" | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | def get_package_info(self, version: str) -> Iterator[Tuple[str, TPackageInfo]]: | ||||
Returns: | Returns: | ||||
(branch name, package metadata) | (branch name, package metadata) | ||||
""" | """ | ||||
yield from {} | yield from {} | ||||
def build_revision( | def build_revision( | ||||
self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git | self, p_info: TPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
"""Build the revision from the archive metadata (extrinsic | """Build the revision from the archive metadata (extrinsic | ||||
artifact metadata) and the intrinsic metadata. | artifact metadata) and the intrinsic metadata. | ||||
Args: | Args: | ||||
a_metadata: Artifact metadata | p_info: Package information | ||||
uncompressed_path: Artifact uncompressed path on disk | uncompressed_path: Artifact uncompressed path on disk | ||||
Returns: | Returns: | ||||
SWH data dict | SWH data dict | ||||
""" | """ | ||||
raise NotImplementedError("build_revision") | raise NotImplementedError("build_revision") | ||||
Show All 35 Lines | def known_artifacts(self, snapshot: Optional[Snapshot]) -> Dict[Sha1Git, BaseModel]: | ||||
return { | return { | ||||
revision["id"]: revision["metadata"] | revision["id"]: revision["metadata"] | ||||
for revision in known_revisions | for revision in known_revisions | ||||
if revision | if revision | ||||
} | } | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_artifacts: Dict, artifact_metadata: Dict | self, known_artifacts: Dict, p_info: TPackageInfo, | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
"""Resolve the revision from a snapshot and an artifact metadata dict. | """Resolve the revision from a snapshot and an artifact metadata dict. | ||||
If the artifact has already been downloaded, this will return the | If the artifact has already been downloaded, this will return the | ||||
existing revision targeting that uncompressed artifact directory. | existing revision targeting that uncompressed artifact directory. | ||||
Otherwise, this returns None. | Otherwise, this returns None. | ||||
Args: | Args: | ||||
snapshot: Snapshot | snapshot: Snapshot | ||||
artifact_metadata: Information dict | p_info: Package information | ||||
Returns: | Returns: | ||||
None or revision identifier | None or revision identifier | ||||
""" | """ | ||||
return None | return None | ||||
def download_package( | def download_package( | ||||
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
load_exceptions: List[Exception] = [] | load_exceptions: List[Exception] = [] | ||||
for version in self.get_versions(): # for each | for version in self.get_versions(): # for each | ||||
logger.debug("version: %s", version) | logger.debug("version: %s", version) | ||||
tmp_revisions[version] = [] | tmp_revisions[version] = [] | ||||
# `p_` stands for `package_` | # `p_` stands for `package_` | ||||
for branch_name, p_info in self.get_package_info(version): | for branch_name, p_info in self.get_package_info(version): | ||||
logger.debug("package_info: %s", p_info) | logger.debug("package_info: %s", p_info) | ||||
revision_id = self.resolve_revision_from(known_artifacts, p_info.raw) | revision_id = self.resolve_revision_from(known_artifacts, p_info) | ||||
if revision_id is None: | if revision_id is None: | ||||
try: | try: | ||||
revision_id = self._load_revision(p_info, origin) | revision_id = self._load_revision(p_info, origin) | ||||
self.storage.flush() | self.storage.flush() | ||||
status_load = "eventful" | status_load = "eventful" | ||||
except Exception as e: | except Exception as e: | ||||
self.storage.clear_buffers() | self.storage.clear_buffers() | ||||
load_exceptions.append(e) | load_exceptions.append(e) | ||||
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines | def _load_revision(self, p_info: TPackageInfo, origin) -> Optional[Sha1Git]: | ||||
logger.debug("Number of contents: %s", len(contents)) | logger.debug("Number of contents: %s", len(contents)) | ||||
self.storage.content_add(contents) | self.storage.content_add(contents) | ||||
logger.debug("Number of directories: %s", len(directories)) | logger.debug("Number of directories: %s", len(directories)) | ||||
self.storage.directory_add(directories) | self.storage.directory_add(directories) | ||||
# FIXME: This should be release. cf. D409 | # FIXME: This should be release. cf. D409 | ||||
revision = self.build_revision( | revision = self.build_revision( | ||||
p_info.raw, uncompressed_path, directory=directory.hash | p_info, uncompressed_path, directory=directory.hash | ||||
) | ) | ||||
if not revision: | if not revision: | ||||
# Some artifacts are missing intrinsic metadata | # Some artifacts are missing intrinsic metadata | ||||
# skipping those | # skipping those | ||||
return None | return None | ||||
extra_metadata: Tuple[str, Any] = ( | extra_metadata: Tuple[str, Any] = ( | ||||
"original_artifact", | "original_artifact", | ||||
▲ Show 20 Lines • Show All 59 Lines • Show Last 20 Lines |