Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341473
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
32 KB
Subscribers
None
View Options
diff --git a/PKG-INFO b/PKG-INFO
index 5fc241c..096b176 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,31 +1,32 @@
Metadata-Version: 2.1
Name: swh.loader.core
-Version: 0.0.93
+Version: 0.0.94
Summary: Software Heritage Base Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core
+Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/
Description: SWH-loader-core
===============
The Software Heritage Core Loader is a low-level loading utilities and
helpers used by other loaders.
The main entry points are classes:
- :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn)
- :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...)
- :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...)
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 455ae3f..836cc6f 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.core >= 0.0.75
swh.model >= 0.0.60
swh.scheduler
-swh.storage >= 0.0.184
+swh.storage >= 0.0.189
diff --git a/setup.py b/setup.py
index cf67a1b..4ec5823 100755
--- a/setup.py
+++ b/setup.py
@@ -1,78 +1,79 @@
#!/usr/bin/env python3
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, "README.md"), encoding="utf-8") as f:
long_description = f.read()
def parse_requirements(name=None):
if name:
reqf = "requirements-%s.txt" % name
else:
reqf = "requirements.txt"
requirements = []
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith("#"):
continue
requirements.append(line)
return requirements
setup(
name="swh.loader.core",
description="Software Heritage Base Loader",
long_description=long_description,
long_description_content_type="text/markdown",
python_requires=">=3.7",
author="Software Heritage developers",
author_email="swh-devel@inria.fr",
url="https://forge.softwareheritage.org/diffusion/DLDBASE",
packages=find_packages(), # packages's modules
scripts=[], # scripts to package
install_requires=parse_requirements() + parse_requirements("swh"),
setup_requires=["vcversioner"],
extras_require={"testing": parse_requirements("test")},
vcversioner={},
include_package_data=True,
entry_points="""
[swh.workers]
loader.archive=swh.loader.package.archive:register
loader.cran=swh.loader.package.cran:register
loader.debian=swh.loader.package.debian:register
loader.deposit=swh.loader.package.deposit:register
loader.nixguix=swh.loader.package.nixguix:register
loader.npm=swh.loader.package.npm:register
loader.pypi=swh.loader.package.pypi:register
[swh.cli.subcommands]
loader=swh.loader.cli:loader
""",
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 5 - Production/Stable",
],
project_urls={
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
"Funding": "https://www.softwareheritage.org/donate",
"Source": "https://forge.softwareheritage.org/source/swh-loader-core",
+ "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-core/",
},
)
diff --git a/swh.loader.core.egg-info/PKG-INFO b/swh.loader.core.egg-info/PKG-INFO
index 5fc241c..096b176 100644
--- a/swh.loader.core.egg-info/PKG-INFO
+++ b/swh.loader.core.egg-info/PKG-INFO
@@ -1,31 +1,32 @@
Metadata-Version: 2.1
Name: swh.loader.core
-Version: 0.0.93
+Version: 0.0.94
Summary: Software Heritage Base Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core
+Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/
Description: SWH-loader-core
===============
The Software Heritage Core Loader is a low-level loading utilities and
helpers used by other loaders.
The main entry points are classes:
- :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn)
- :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...)
- :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...)
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
diff --git a/swh.loader.core.egg-info/requires.txt b/swh.loader.core.egg-info/requires.txt
index be428fc..7c7c1b5 100644
--- a/swh.loader.core.egg-info/requires.txt
+++ b/swh.loader.core.egg-info/requires.txt
@@ -1,19 +1,19 @@
vcversioner
psutil
requests
iso8601
pkginfo
python-debian
python-dateutil
swh.core>=0.0.75
swh.model>=0.0.60
swh.scheduler
-swh.storage>=0.0.184
+swh.storage>=0.0.189
[testing]
pytest
pytest-mock
pytest-postgresql>=2.1.0
requests_mock
swh-core[testing]
swh-scheduler[testing]
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
index f8f8c82..80aff75 100644
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -1,131 +1,131 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import hashlib
import logging
import pytest
from swh.model.model import Origin
from swh.loader.core.loader import BaseLoader, DVCSLoader
class DummyLoader:
def cleanup(self):
pass
def prepare(self, *args, **kwargs):
pass
def fetch_data(self):
pass
def store_data(self):
pass
def get_snapshot_id(self):
return None
def prepare_origin_visit(self, *args, **kwargs):
origin = Origin(url="some-url")
self.origin = origin
self.origin_url = origin.url
- self.visit_date = datetime.datetime.utcnow()
+ self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
self.visit_type = "git"
origin_url = self.storage.origin_add_one(origin)
self.visit = self.storage.origin_visit_add(
origin_url, self.visit_date, self.visit_type
)
class DummyDVCSLoader(DummyLoader, DVCSLoader):
"""Unbuffered loader will send directly to storage new data
"""
def parse_config_file(self, *args, **kwargs):
return {
"max_content_size": 100 * 1024 * 1024,
"storage": {
"cls": "pipeline",
"steps": [{"cls": "retry",}, {"cls": "filter",}, {"cls": "memory",},],
},
}
class DummyBaseLoader(DummyLoader, BaseLoader):
"""Buffered loader will send new data when threshold is reached
"""
def parse_config_file(self, *args, **kwargs):
return {
"max_content_size": 100 * 1024 * 1024,
"storage": {
"cls": "pipeline",
"steps": [
{"cls": "retry",},
{"cls": "filter",},
{
"cls": "buffer",
"min_batch_size": {
"content": 2,
"content_bytes": 8,
"directory": 2,
"revision": 2,
"release": 2,
},
},
{"cls": "memory",},
],
},
}
def test_base_loader():
loader = DummyBaseLoader()
result = loader.load()
assert result == {"status": "eventful"}
def test_dvcs_loader():
loader = DummyDVCSLoader()
result = loader.load()
assert result == {"status": "eventful"}
def test_loader_logger_default_name():
loader = DummyBaseLoader()
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader"
loader = DummyDVCSLoader()
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader"
def test_loader_logger_with_name():
loader = DummyBaseLoader("some.logger.name")
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "some.logger.name"
@pytest.mark.fs
def test_loader_save_data_path(tmp_path):
loader = DummyBaseLoader("some.logger.name.1")
url = "http://bitbucket.org/something"
loader.origin = Origin(url=url)
loader.visit_date = datetime.datetime(year=2019, month=10, day=1)
loader.config = {
"save_data_path": tmp_path,
}
hash_url = hashlib.sha1(url.encode("utf-8")).hexdigest()
expected_save_path = "%s/sha1:%s/%s/2019" % (str(tmp_path), hash_url[0:2], hash_url)
save_path = loader.get_save_data_path()
assert save_path == expected_save_path
diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
index b2fcd68..203f5c9 100644
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -1,264 +1,268 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import requests
import types
from typing import Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple, Union
from swh.model.hashutil import hash_to_hex, hash_to_bytes
from swh.model.model import (
Person,
Revision,
RevisionType,
TimestampWithTimezone,
Sha1Git,
)
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import download
logger = logging.getLogger(__name__)
class DepositLoader(PackageLoader):
"""Load pypi origin's artifact releases into swh archive.
"""
visit_type = "deposit"
def __init__(self, url: str, deposit_id: str):
"""Constructor
Args:
url: Origin url to associate the artifacts/metadata to
deposit_id: Deposit identity
"""
super().__init__(url=url)
config_deposit = self.config["deposit"]
self.deposit_id = deposit_id
self.client = ApiClient(url=config_deposit["url"], auth=config_deposit["auth"])
self.metadata: Dict[str, Any] = {}
def get_versions(self) -> Sequence[str]:
# only 1 branch 'HEAD' with no alias since we only have 1 snapshot
# branch
return ["HEAD"]
def get_package_info(
self, version: str
) -> Generator[Tuple[str, Mapping[str, Any]], None, None]:
p_info = {
"filename": "archive.zip",
"raw": self.metadata,
}
yield "HEAD", p_info
def download_package(
self, p_info: Mapping[str, Any], tmpdir: str
) -> List[Tuple[str, Mapping]]:
"""Override to allow use of the dedicated deposit client
"""
return [self.client.archive_get(self.deposit_id, tmpdir, p_info["filename"])]
def build_revision(
self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git
) -> Optional[Revision]:
depo = a_metadata.pop("deposit")
# Note:
# `date` and `committer_date` are always transmitted by the deposit read api
# which computes itself the values. The loader needs to use those to create the
# revision.
# date: codemeta:dateCreated if any, deposit completed_date otherwise
date = TimestampWithTimezone.from_dict(depo["author_date"])
# commit_date: codemeta:datePublished if any, deposit completed_date otherwise
commit_date = TimestampWithTimezone.from_dict(depo["committer_date"])
client, id, collection = [depo[k] for k in ["client", "id", "collection"]]
message = f"{client}: Deposit {id} in collection {collection}".encode("utf-8")
author = parse_author(depo["author"])
committer = parse_author(depo["committer"])
return Revision(
type=RevisionType.TAR,
message=message,
author=author,
date=date,
committer=committer,
committer_date=commit_date,
parents=[hash_to_bytes(p) for p in depo["revision_parents"]],
directory=directory,
synthetic=True,
metadata={
"extrinsic": {
"provider": self.client.metadata_url(self.deposit_id),
"when": self.visit_date.isoformat(),
"raw": a_metadata,
},
},
)
def load(self) -> Dict:
# First making sure the deposit is known prior to trigger a loading
try:
self.metadata = self.client.metadata_get(self.deposit_id)
except ValueError:
logger.error(f"Unknown deposit {self.deposit_id}, ignoring")
return {"status": "failed"}
# Then usual loading
r = super().load()
success = r["status"] != "failed"
if success:
# Update archive with metadata information
origin_metadata = self.metadata["origin_metadata"]
logger.debug("origin_metadata: %s", origin_metadata)
tools = self.storage.tool_add([origin_metadata["tool"]])
logger.debug("tools: %s", tools)
tool_id = tools[0]["id"]
provider = origin_metadata["provider"]
# FIXME: Shall we delete this info?
provider_id = self.storage.metadata_provider_add(
provider["provider_name"],
provider["provider_type"],
provider["provider_url"],
metadata=None,
)
metadata = origin_metadata["metadata"]
self.storage.origin_metadata_add(
self.url, self.visit_date, provider_id, tool_id, metadata
)
# Update deposit status
try:
if not success:
self.client.status_update(self.deposit_id, status="failed")
return r
snapshot_id = hash_to_bytes(r["snapshot_id"])
branches = self.storage.snapshot_get(snapshot_id)["branches"]
logger.debug("branches: %s", branches)
if not branches:
return r
rev_id = branches[b"HEAD"]["target"]
revisions = self.storage.revision_get([rev_id])
# FIXME: inconsistency between tests and production code
if isinstance(revisions, types.GeneratorType):
revisions = list(revisions)
revision = revisions[0]
# Retrieve the revision identifier
dir_id = revision["directory"]
# update the deposit's status to success with its
# revision-id and directory-id
self.client.status_update(
self.deposit_id,
status="done",
revision_id=hash_to_hex(rev_id),
directory_id=hash_to_hex(dir_id),
+ snapshot_id=r["snapshot_id"],
origin_url=self.url,
)
except Exception:
logger.exception("Problem when trying to update the deposit's status")
return {"status": "failed"}
return r
def parse_author(author) -> Person:
"""See prior fixme
"""
return Person(
fullname=author["fullname"].encode("utf-8"),
name=author["name"].encode("utf-8"),
email=author["email"].encode("utf-8"),
)
class ApiClient:
"""Private Deposit Api client
"""
def __init__(self, url, auth: Optional[Mapping[str, str]]):
self.base_url = url.rstrip("/")
self.auth = None if not auth else (auth["username"], auth["password"])
def do(self, method: str, url: str, *args, **kwargs):
"""Internal method to deal with requests, possibly with basic http
authentication.
Args:
method (str): supported http methods as in get/post/put
Returns:
The request's execution output
"""
method_fn = getattr(requests, method)
if self.auth:
kwargs["auth"] = self.auth
return method_fn(url, *args, **kwargs)
def archive_get(
self, deposit_id: Union[int, str], tmpdir: str, filename: str
) -> Tuple[str, Dict]:
"""Retrieve deposit's archive artifact locally
"""
url = f"{self.base_url}/{deposit_id}/raw/"
return download(url, dest=tmpdir, filename=filename, auth=self.auth)
def metadata_url(self, deposit_id: Union[int, str]) -> str:
return f"{self.base_url}/{deposit_id}/meta/"
def metadata_get(self, deposit_id: Union[int, str]) -> Dict[str, Any]:
"""Retrieve deposit's metadata artifact as json
"""
url = self.metadata_url(deposit_id)
r = self.do("get", url)
if r.ok:
return r.json()
msg = f"Problem when retrieving deposit metadata at {url}"
logger.error(msg)
raise ValueError(msg)
def status_update(
self,
deposit_id: Union[int, str],
status: str,
revision_id: Optional[str] = None,
directory_id: Optional[str] = None,
+ snapshot_id: Optional[str] = None,
origin_url: Optional[str] = None,
):
"""Update deposit's information including status, and persistent
identifiers result of the loading.
"""
url = f"{self.base_url}/{deposit_id}/update/"
payload = {"status": status}
if revision_id:
payload["revision_id"] = revision_id
if directory_id:
payload["directory_id"] = directory_id
+ if snapshot_id:
+ payload["snapshot_id"] = snapshot_id
if origin_url:
payload["origin_url"] = origin_url
self.do("put", url, json=payload)
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
index cf61891..0efb85a 100644
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -1,329 +1,375 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import pytest
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.loader.package.deposit.loader import DepositLoader
from swh.loader.package.tests.common import (
check_snapshot,
check_metadata_paths,
get_stats,
)
from swh.core.pytest_plugin import requests_mock_datadir_factory
+DEPOSIT_URL = "https://deposit.softwareheritage.org/1/private"
+
+
@pytest.fixture
def requests_mock_datadir(requests_mock_datadir):
"""Enhance default mock data to mock put requests as the loader does some
internal update queries there.
"""
requests_mock_datadir.put(re.compile("https"))
return requests_mock_datadir
def test_deposit_init_ok(swh_config, swh_loader_config):
url = "some-url"
deposit_id = 999
loader = DepositLoader(url, deposit_id) # Something that does not exist
assert loader.url == url
assert loader.client is not None
assert loader.client.base_url == swh_loader_config["deposit"]["url"]
def test_deposit_loading_unknown_deposit(swh_config, requests_mock_datadir):
"""Loading an unknown deposit should fail
no origin, no visit, no snapshot
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = "some-url"
unknown_deposit_id = 667
loader = DepositLoader(url, unknown_deposit_id) # does not exist
actual_load_status = loader.load()
assert actual_load_status == {"status": "failed"}
stats = get_stats(loader.storage)
assert {
"content": 0,
"directory": 0,
"origin": 0,
"origin_visit": 0,
"person": 0,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 0,
} == stats
requests_mock_datadir_missing_one = requests_mock_datadir_factory(
- ignore_urls=["https://deposit.softwareheritage.org/1/private/666/raw/",]
+ ignore_urls=[f"{DEPOSIT_URL}/666/raw/",]
)
def test_deposit_loading_failure_to_retrieve_1_artifact(
swh_config, requests_mock_datadir_missing_one
):
"""Deposit with missing artifact ends up with an uneventful/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = "some-url-2"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
assert actual_load_status["status"] == "uneventful"
assert actual_load_status["snapshot_id"] is not None
stats = get_stats(loader.storage)
assert {
"content": 0,
"directory": 0,
"origin": 1,
"origin_visit": 1,
"person": 0,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
origin_visit = loader.storage.origin_visit_get_latest(url)
assert origin_visit["status"] == "partial"
assert origin_visit["type"] == "deposit"
def test_revision_metadata_structure(swh_config, requests_mock_datadir):
url = "https://hal-test.archives-ouvertes.fr/some-external-id"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
expected_revision_id = hash_to_bytes("637318680351f5d78856d13264faebbd91efe9bb")
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(
revision["metadata"],
paths=[
("extrinsic.provider", str),
("extrinsic.when", str),
("extrinsic.raw", dict),
("original_artifact", list),
],
)
# Only 2 top-level keys now
assert set(revision["metadata"].keys()) == {"extrinsic", "original_artifact"}
for original_artifact in revision["metadata"]["original_artifact"]:
check_metadata_paths(
original_artifact,
paths=[("filename", str), ("length", int), ("checksums", dict),],
)
def test_deposit_loading_ok(swh_config, requests_mock_datadir):
url = "https://hal-test.archives-ouvertes.fr/some-external-id"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
stats = get_stats(loader.storage)
assert {
"content": 303,
"directory": 12,
"origin": 1,
"origin_visit": 1,
"person": 1,
"release": 0,
"revision": 1,
"skipped_content": 0,
"snapshot": 1,
} == stats
origin_visit = loader.storage.origin_visit_get_latest(url)
assert origin_visit["status"] == "full"
assert origin_visit["type"] == "deposit"
+ revision_id = "637318680351f5d78856d13264faebbd91efe9bb"
expected_branches = {
- "HEAD": {
- "target": "637318680351f5d78856d13264faebbd91efe9bb",
- "target_type": "revision",
- },
+ "HEAD": {"target": revision_id, "target_type": "revision",},
}
expected_snapshot = {
"id": expected_snapshot_id,
"branches": expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
+ revision = next(loader.storage.revision_get([hash_to_bytes(revision_id)]))
+ assert revision
+
# check metadata
tool = {
"name": "swh-deposit",
"version": "0.0.1",
"configuration": {"sword_version": "2",},
}
tool = loader.storage.tool_get(tool)
assert tool is not None
assert tool["id"] is not None
provider = {
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"metadata": None,
}
provider = loader.storage.metadata_provider_get_by(provider)
assert provider is not None
assert provider["id"] is not None
metadata = list(
loader.storage.origin_metadata_get_by(url, provider_type="deposit_client")
)
assert metadata is not None
assert isinstance(metadata, list)
assert len(metadata) == 1
metadata0 = metadata[0]
assert metadata0["provider_id"] == provider["id"]
assert metadata0["provider_type"] == "deposit_client"
assert metadata0["tool_id"] == tool["id"]
+ # Retrieve the information for deposit status update query to the deposit
+ urls = [
+ m
+ for m in requests_mock_datadir.request_history
+ if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
+ ]
+
+ assert len(urls) == 1
+ update_query = urls[0]
+
+ body = update_query.json()
+ expected_body = {
+ "status": "done",
+ "revision_id": revision_id,
+ "directory_id": hash_to_hex(revision["directory"]),
+ "snapshot_id": expected_snapshot_id,
+ "origin_url": url,
+ }
+
+ assert body == expected_body
+
def test_deposit_loading_ok_2(swh_config, requests_mock_datadir):
"""Field dates should be se appropriately
"""
external_id = "some-external-id"
url = f"https://hal-test.archives-ouvertes.fr/{external_id}"
deposit_id = 777
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c"
expected_branches = {"HEAD": {"target": revision_id, "target_type": "revision"}}
expected_snapshot = {
"id": expected_snapshot_id,
"branches": expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
origin_visit = loader.storage.origin_visit_get_latest(url)
# The visit is partial because some hash collision were detected
assert origin_visit["status"] == "full"
assert origin_visit["type"] == "deposit"
raw_meta = loader.client.metadata_get(deposit_id)
# Ensure the date fields are set appropriately in the revision
# Retrieve the revision
revision = next(loader.storage.revision_get([hash_to_bytes(revision_id)]))
assert revision
assert revision["date"] == raw_meta["deposit"]["author_date"]
assert revision["committer_date"] == raw_meta["deposit"]["committer_date"]
- read_api = f"https://deposit.softwareheritage.org/1/private/{deposit_id}/meta/"
+ read_api = f"{DEPOSIT_URL}/{deposit_id}/meta/"
assert revision["metadata"] == {
"extrinsic": {
"provider": read_api,
"raw": {
"origin": {"type": "deposit", "url": url,},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one",],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"codemeta:datePublished": "2017-10-08T15:00:00Z",
"external_identifier": "some-external-id",
"url": url,
},
"provider": {
"metadata": None,
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": "0.0.1",
},
},
},
"when": revision["metadata"]["extrinsic"]["when"], # dynamic
},
"original_artifact": [
{
"checksums": {
"sha1": "f8c63d7c890a7453498e6cf9fef215d85ec6801d",
"sha256": "474bf646aeeff6d945eb752b1a9f8a40f3d81a88909ee7bd2d08cc822aa361e6", # noqa
},
"filename": "archive.zip",
"length": 956830,
}
],
}
# Check the metadata swh side
origin_meta = list(
loader.storage.origin_metadata_get_by(url, provider_type="deposit_client")
)
assert len(origin_meta) == 1
origin_meta = origin_meta[0]
# dynamic, a pain to display and not that interesting
origin_meta.pop("discovery_date")
assert origin_meta == {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"codemeta:datePublished": "2017-10-08T15:00:00Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_url": "https://hal-test.archives-ouvertes.fr/some-external-id",
"provider_id": 1,
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"tool_id": 1,
}
+
+ # Retrieve the information for deposit status update query to the deposit
+ urls = [
+ m
+ for m in requests_mock_datadir.request_history
+ if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
+ ]
+
+ assert len(urls) == 1
+ update_query = urls[0]
+
+ body = update_query.json()
+ expected_body = {
+ "status": "done",
+ "revision_id": revision_id,
+ "directory_id": hash_to_hex(revision["directory"]),
+ "snapshot_id": expected_snapshot_id,
+ "origin_url": url,
+ }
+
+ assert body == expected_body
diff --git a/version.txt b/version.txt
index bc9071a..8016821 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.93-0-gfca7e47
\ No newline at end of file
+v0.0.94-0-g8bd38f9
\ No newline at end of file
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:04 PM (2 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3449900
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment