diff --git a/requirements-swh-with-content.txt b/requirements-swh-with-content.txt new file mode 100644 --- /dev/null +++ b/requirements-swh-with-content.txt @@ -0,0 +1 @@ +swh.objstorage diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -2,5 +2,4 @@ swh.core[http] >= 2 swh.journal >= 0.9 swh.model >= 4.3 -swh.objstorage swh.storage diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -51,7 +51,10 @@ dataset=swh.dataset.cli """, setup_requires=["setuptools-scm"], - extras_require={"testing": parse_requirements("test")}, + extras_require={ + "testing": parse_requirements("test"), + "with-content": parse_requirements("swh-with-content"), + }, use_scm_version=True, include_package_data=True, classifiers=[ diff --git a/swh/dataset/exporters/orc.py b/swh/dataset/exporters/orc.py --- a/swh/dataset/exporters/orc.py +++ b/swh/dataset/exporters/orc.py @@ -8,7 +8,7 @@ import logging import math from types import TracebackType -from typing import Any, Optional, Tuple, Type, cast +from typing import Any, Callable, Optional, Tuple, Type, cast from pkg_resources import get_distribution from pyorc import ( @@ -30,8 +30,18 @@ from swh.dataset.utils import remove_pull_requests from swh.model.hashutil import hash_to_hex from swh.model.model import TimestampWithTimezone -from swh.objstorage.factory import get_objstorage -from swh.objstorage.objstorage import ID_HASH_ALGO, ObjNotFoundError + +ObjNotFoundError: Type[Exception] +get_objstorage: Optional[Callable] +ID_HASH_ALGO: str +try: + from swh.objstorage.factory import get_objstorage + from swh.objstorage.objstorage import ID_HASH_ALGO, ObjNotFoundError +except ImportError: + get_objstorage = None + ID_HASH_ALGO = "" + ObjNotFoundError = Exception # helps keep mypy happy + ORC_TYPE_MAP = { "string": String, @@ -138,7 +148,15 @@ self.with_data = config.get("with_data", False) self.objstorage = None if self.with_data: - assert "objstorage" in config + if get_objstorage is None: + raise EnvironmentError( + "The swh-objstorage dependency package must be installed " + "when 'with_data' is set. Please install 'swh.dataset[with-content]'" + ) + if "objstorage" not in config: + raise ValueError( + "The 'objstorage' configuration entry is mandatory when 'with_data' is set." + ) self.objstorage = get_objstorage(**config["objstorage"]) self._reset()