Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/loader.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import logging | import logging | ||||
from os import path | from os import path | ||||
from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union | from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union | ||||
import attr | import attr | ||||
import iso8601 | import iso8601 | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import release_name | from swh.loader.package.utils import release_name | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.storage.interface import StorageInterface | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
SWH_PERSON = Person( | SWH_PERSON = Person( | ||||
name=b"Software Heritage", | name=b"Software Heritage", | ||||
fullname=b"Software Heritage", | fullname=b"Software Heritage", | ||||
email=b"robot@softwareheritage.org", | email=b"robot@softwareheritage.org", | ||||
) | ) | ||||
REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" | REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" | ||||
Show All 35 Lines | class ArchiveLoader(PackageLoader[ArchivePackageInfo]): | ||||
"""Load archive origin's artifact files into swh archive | """Load archive origin's artifact files into swh archive | ||||
""" | """ | ||||
visit_type = "tar" | visit_type = "tar" | ||||
def __init__( | def __init__( | ||||
self, | self, | ||||
storage: StorageInterface, | |||||
url: str, | url: str, | ||||
artifacts: Sequence[Dict[str, Any]], | artifacts: Sequence[Dict[str, Any]], | ||||
identity_artifact_keys: Optional[Sequence[str]] = None, | identity_artifact_keys: Optional[Sequence[str]] = None, | ||||
max_content_size: Optional[int] = None, | |||||
): | ): | ||||
"""Loader constructor. | """Loader constructor. | ||||
For now, this is the lister's task output. | For now, this is the lister's task output. | ||||
Args: | Args: | ||||
url: Origin url | url: Origin url | ||||
artifacts: List of artifact information with keys: | artifacts: List of artifact information with keys: | ||||
- **time**: last modification time as either isoformat date | - **time**: last modification time as either isoformat date | ||||
string or timestamp | string or timestamp | ||||
- **url**: the artifact url to retrieve filename | - **url**: the artifact url to retrieve filename | ||||
- **filename**: optionally, the file's name | - **filename**: optionally, the file's name | ||||
- **version**: artifact's version | - **version**: artifact's version | ||||
- **length**: artifact's length | - **length**: artifact's length | ||||
identity_artifact_keys: Optional List of keys forming the | identity_artifact_keys: Optional List of keys forming the | ||||
"identity" of an artifact | "identity" of an artifact | ||||
""" | """ | ||||
super().__init__(url=url) | super().__init__(storage=storage, url=url, max_content_size=max_content_size) | ||||
self.artifacts = artifacts # assume order is enforced in the lister | self.artifacts = artifacts # assume order is enforced in the lister | ||||
self.identity_artifact_keys = identity_artifact_keys | self.identity_artifact_keys = identity_artifact_keys | ||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
versions = [] | versions = [] | ||||
for archive in self.artifacts: | for archive in self.artifacts: | ||||
v = archive.get("version") | v = archive.get("version") | ||||
if v: | if v: | ||||
▲ Show 20 Lines • Show All 62 Lines • Show Last 20 Lines |