Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/loader.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
from os import path | from os import path | ||||
import string | import string | ||||
from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union | from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union | ||||
import attr | import attr | ||||
import iso8601 | import iso8601 | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BaseManifestPackageInfo, PackageLoader | ||||
from swh.loader.package.utils import release_name | from swh.loader.package.utils import release_name | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
SWH_PERSON = Person( | SWH_PERSON = Person( | ||||
name=b"Software Heritage", | name=b"Software Heritage", | ||||
fullname=b"Software Heritage", | fullname=b"Software Heritage", | ||||
email=b"robot@softwareheritage.org", | email=b"robot@softwareheritage.org", | ||||
) | ) | ||||
REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" | REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" | ||||
@attr.s | @attr.s | ||||
class ArchivePackageInfo(BasePackageInfo): | class ArchivePackageInfo(BaseManifestPackageInfo): | ||||
raw_info = attr.ib(type=Dict[str, Any]) | raw_info = attr.ib(type=Dict[str, Any]) | ||||
length = attr.ib(type=int) | length = attr.ib(type=int) | ||||
"""Size of the archive file""" | """Size of the archive file""" | ||||
time = attr.ib(type=Union[str, datetime.datetime]) | time = attr.ib(type=Union[str, datetime.datetime]) | ||||
"""Timestamp of the archive file on the server""" | """Timestamp of the archive file on the server""" | ||||
version = attr.ib(type=str) | version = attr.ib(type=str) | ||||
# default format for gnu | # default format for gnu | ||||
▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines | ) -> Iterator[Tuple[str, ArchivePackageInfo]]: | ||||
# FIXME: this code assumes we have only 1 artifact per | # FIXME: this code assumes we have only 1 artifact per | ||||
# versioned package | # versioned package | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def extid_from_reference_artifact(self, reference_artifact: Dict) -> bytes: | def extid_from_reference_artifact(self, reference_artifact: Dict) -> bytes: | ||||
reference_artifact_info = ArchivePackageInfo.from_metadata(reference_artifact) | reference_artifact_info = ArchivePackageInfo.from_metadata(reference_artifact) | ||||
return reference_artifact_info.extid(manifest_format=self.extid_manifest_format) | return reference_artifact_info.extid(manifest_format=self.extid_manifest_format) | ||||
def resolve_revision_from( | |||||
self, known_artifacts: Dict, p_info: ArchivePackageInfo | |||||
) -> Optional[bytes]: | |||||
extid = p_info.extid(manifest_format=self.extid_manifest_format) | |||||
for rev_id, known_artifact in known_artifacts.items(): | |||||
logging.debug("known_artifact: %s", known_artifact) | |||||
reference_artifact = known_artifact["extrinsic"]["raw"] | |||||
known_extid = self.extid_from_reference_artifact(reference_artifact) | |||||
if extid == known_extid: | |||||
return rev_id | |||||
return None | |||||
def build_revision( | def build_revision( | ||||
self, p_info: ArchivePackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: ArchivePackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
time = p_info.time # assume it's a timestamp | time = p_info.time # assume it's a timestamp | ||||
if isinstance(time, str): # otherwise, assume it's a parsable date | if isinstance(time, str): # otherwise, assume it's a parsable date | ||||
parsed_time = iso8601.parse_date(time) | parsed_time = iso8601.parse_date(time) | ||||
else: | else: | ||||
parsed_time = time | parsed_time = time | ||||
Show All 20 Lines |