Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | |||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
@attr.s | @attr.s | ||||
class RawExtrinsicMetadataCore: | |||||
"""Contains the core of the metadata extracted by a loader, that will be | |||||
used to build a full RawExtrinsicMetadata object by adding object identifier, | |||||
context, and provenance information.""" | |||||
format = attr.ib(type=str) | |||||
metadata = attr.ib(type=bytes) | |||||
discovery_date = attr.ib(type=Optional[datetime.datetime]) | |||||
"""Defaults to the visit date.""" | |||||
@attr.s | |||||
class BasePackageInfo: | class BasePackageInfo: | ||||
"""Compute the primary key for a dict using the id_keys as primary key | """Compute the primary key for a dict using the id_keys as primary key | ||||
composite. | composite. | ||||
Args: | Args: | ||||
d: A dict entry to compute the primary key on | d: A dict entry to compute the primary key on | ||||
id_keys: Sequence of keys to use as primary key | id_keys: Sequence of keys to use as primary key | ||||
Returns: | Returns: | ||||
The identity for that dict entry | The identity for that dict entry | ||||
""" | """ | ||||
url = attr.ib(type=str) | url = attr.ib(type=str) | ||||
filename = attr.ib(type=Optional[str]) | filename = attr.ib(type=Optional[str]) | ||||
# The following attribute has kw_only=True in order to allow subclasses | # The following attribute has kw_only=True in order to allow subclasses | ||||
# to add attributes. Without kw_only, attributes without default values cannot | # to add attributes. Without kw_only, attributes without default values cannot | ||||
# go after attributes with default values. | # go after attributes with default values. | ||||
# See <https://github.com/python-attrs/attrs/issues/38> | # See <https://github.com/python-attrs/attrs/issues/38> | ||||
revision_extrinsic_metadata = attr.ib( | revision_extrinsic_metadata = attr.ib( | ||||
type=List[Tuple[Optional[datetime.datetime], str, bytes]], | type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | ||||
ardumont: Yep, clearer ;) | |||||
default=[], | |||||
kw_only=True, | |||||
) | ) | ||||
"""Tuple elements are respectively the 'discovery_date' (which defaults to the | |||||
visit date), 'format', and 'metadata' fields of RawExtrinsicMetadata""" | |||||
# TODO: add support for metadata for origins, directories, and contents | # TODO: add support for metadata for origins, directories, and contents | ||||
@property | @property | ||||
def ID_KEYS(self): | def ID_KEYS(self): | ||||
raise NotImplementedError(f"{self.__class__.__name__} is missing ID_KEYS") | raise NotImplementedError(f"{self.__class__.__name__} is missing ID_KEYS") | ||||
def artifact_identity(self): | def artifact_identity(self): | ||||
▲ Show 20 Lines • Show All 496 Lines • ▼ Show 20 Lines | ) -> List[RawExtrinsicMetadata]: | ||||
# an implementation for get_metadata_authority. | # an implementation for get_metadata_authority. | ||||
return [] | return [] | ||||
authority = self.get_metadata_authority() | authority = self.get_metadata_authority() | ||||
fetcher = self.get_metadata_fetcher() | fetcher = self.get_metadata_fetcher() | ||||
metadata_objects = [] | metadata_objects = [] | ||||
for (discovery_date, format, metadata) in p_info.revision_extrinsic_metadata: | for item in p_info.revision_extrinsic_metadata: | ||||
metadata_objects.append( | metadata_objects.append( | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=SWHID(object_type="revision", object_id=revision_id), | id=SWHID(object_type="revision", object_id=revision_id), | ||||
discovery_date=discovery_date or self.visit_date, | discovery_date=item.discovery_date or self.visit_date, | ||||
authority=authority, | authority=authority, | ||||
fetcher=fetcher, | fetcher=fetcher, | ||||
format=format, | format=item.format, | ||||
metadata=metadata, | metadata=item.metadata, | ||||
origin=self.url, | origin=self.url, | ||||
) | ) | ||||
) | ) | ||||
return metadata_objects | return metadata_objects | ||||
def _load_extrinsic_revision_metadata( | def _load_extrinsic_revision_metadata( | ||||
self, p_info: TPackageInfo, revision_id: Sha1Git | self, p_info: TPackageInfo, revision_id: Sha1Git | ||||
Show All 27 Lines |
Yep, clearer ;)