Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/PKG-INFO b/PKG-INFO
index 5fc241c..096b176 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,31 +1,32 @@
Metadata-Version: 2.1
Name: swh.loader.core
-Version: 0.0.93
+Version: 0.0.94
Summary: Software Heritage Base Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core
+Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/
Description: SWH-loader-core
===============
The Software Heritage Core Loader is a low-level loading utilities and
helpers used by other loaders.
The main entry points are classes:
- :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn)
- :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...)
- :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...)
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 455ae3f..836cc6f 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.core >= 0.0.75
swh.model >= 0.0.60
swh.scheduler
-swh.storage >= 0.0.184
+swh.storage >= 0.0.189
diff --git a/setup.py b/setup.py
index cf67a1b..4ec5823 100755
--- a/setup.py
+++ b/setup.py
@@ -1,78 +1,79 @@
#!/usr/bin/env python3
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, "README.md"), encoding="utf-8") as f:
long_description = f.read()
def parse_requirements(name=None):
if name:
reqf = "requirements-%s.txt" % name
else:
reqf = "requirements.txt"
requirements = []
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith("#"):
continue
requirements.append(line)
return requirements
setup(
name="swh.loader.core",
description="Software Heritage Base Loader",
long_description=long_description,
long_description_content_type="text/markdown",
python_requires=">=3.7",
author="Software Heritage developers",
author_email="swh-devel@inria.fr",
url="https://forge.softwareheritage.org/diffusion/DLDBASE",
packages=find_packages(), # packages's modules
scripts=[], # scripts to package
install_requires=parse_requirements() + parse_requirements("swh"),
setup_requires=["vcversioner"],
extras_require={"testing": parse_requirements("test")},
vcversioner={},
include_package_data=True,
entry_points="""
[swh.workers]
loader.archive=swh.loader.package.archive:register
loader.cran=swh.loader.package.cran:register
loader.debian=swh.loader.package.debian:register
loader.deposit=swh.loader.package.deposit:register
loader.nixguix=swh.loader.package.nixguix:register
loader.npm=swh.loader.package.npm:register
loader.pypi=swh.loader.package.pypi:register
[swh.cli.subcommands]
loader=swh.loader.cli:loader
""",
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 5 - Production/Stable",
],
project_urls={
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
"Funding": "https://www.softwareheritage.org/donate",
"Source": "https://forge.softwareheritage.org/source/swh-loader-core",
+ "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-core/",
},
)
diff --git a/swh.loader.core.egg-info/PKG-INFO b/swh.loader.core.egg-info/PKG-INFO
index 5fc241c..096b176 100644
--- a/swh.loader.core.egg-info/PKG-INFO
+++ b/swh.loader.core.egg-info/PKG-INFO
@@ -1,31 +1,32 @@
Metadata-Version: 2.1
Name: swh.loader.core
-Version: 0.0.93
+Version: 0.0.94
Summary: Software Heritage Base Loader
Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core
+Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/
Description: SWH-loader-core
===============
The Software Heritage Core Loader is a low-level loading utilities and
helpers used by other loaders.
The main entry points are classes:
- :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn)
- :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...)
- :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...)
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Requires-Python: >=3.7
Description-Content-Type: text/markdown
Provides-Extra: testing
diff --git a/swh.loader.core.egg-info/requires.txt b/swh.loader.core.egg-info/requires.txt
index be428fc..7c7c1b5 100644
--- a/swh.loader.core.egg-info/requires.txt
+++ b/swh.loader.core.egg-info/requires.txt
@@ -1,19 +1,19 @@
vcversioner
psutil
requests
iso8601
pkginfo
python-debian
python-dateutil
swh.core>=0.0.75
swh.model>=0.0.60
swh.scheduler
-swh.storage>=0.0.184
+swh.storage>=0.0.189
[testing]
pytest
pytest-mock
pytest-postgresql>=2.1.0
requests_mock
swh-core[testing]
swh-scheduler[testing]
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
index f8f8c82..80aff75 100644
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -1,131 +1,131 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import hashlib
import logging
import pytest
from swh.model.model import Origin
from swh.loader.core.loader import BaseLoader, DVCSLoader
class DummyLoader:
def cleanup(self):
pass
def prepare(self, *args, **kwargs):
pass
def fetch_data(self):
pass
def store_data(self):
pass
def get_snapshot_id(self):
return None
def prepare_origin_visit(self, *args, **kwargs):
origin = Origin(url="some-url")
self.origin = origin
self.origin_url = origin.url
- self.visit_date = datetime.datetime.utcnow()
+ self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
self.visit_type = "git"
origin_url = self.storage.origin_add_one(origin)
self.visit = self.storage.origin_visit_add(
origin_url, self.visit_date, self.visit_type
)
class DummyDVCSLoader(DummyLoader, DVCSLoader):
"""Unbuffered loader will send directly to storage new data
"""
def parse_config_file(self, *args, **kwargs):
return {
"max_content_size": 100 * 1024 * 1024,
"storage": {
"cls": "pipeline",
"steps": [{"cls": "retry",}, {"cls": "filter",}, {"cls": "memory",},],
},
}
class DummyBaseLoader(DummyLoader, BaseLoader):
"""Buffered loader will send new data when threshold is reached
"""
def parse_config_file(self, *args, **kwargs):
return {
"max_content_size": 100 * 1024 * 1024,
"storage": {
"cls": "pipeline",
"steps": [
{"cls": "retry",},
{"cls": "filter",},
{
"cls": "buffer",
"min_batch_size": {
"content": 2,
"content_bytes": 8,
"directory": 2,
"revision": 2,
"release": 2,
},
},
{"cls": "memory",},
],
},
}
def test_base_loader():
loader = DummyBaseLoader()
result = loader.load()
assert result == {"status": "eventful"}
def test_dvcs_loader():
loader = DummyDVCSLoader()
result = loader.load()
assert result == {"status": "eventful"}
def test_loader_logger_default_name():
loader = DummyBaseLoader()
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader"
loader = DummyDVCSLoader()
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader"
def test_loader_logger_with_name():
loader = DummyBaseLoader("some.logger.name")
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "some.logger.name"
@pytest.mark.fs
def test_loader_save_data_path(tmp_path):
loader = DummyBaseLoader("some.logger.name.1")
url = "http://bitbucket.org/something"
loader.origin = Origin(url=url)
loader.visit_date = datetime.datetime(year=2019, month=10, day=1)
loader.config = {
"save_data_path": tmp_path,
}
hash_url = hashlib.sha1(url.encode("utf-8")).hexdigest()
expected_save_path = "%s/sha1:%s/%s/2019" % (str(tmp_path), hash_url[0:2], hash_url)
save_path = loader.get_save_data_path()
assert save_path == expected_save_path
diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
index b2fcd68..203f5c9 100644
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -1,264 +1,268 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import requests
import types
from typing import Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple, Union
from swh.model.hashutil import hash_to_hex, hash_to_bytes
from swh.model.model import (
Person,
Revision,
RevisionType,
TimestampWithTimezone,
Sha1Git,
)
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import download
logger = logging.getLogger(__name__)
class DepositLoader(PackageLoader):
"""Load pypi origin's artifact releases into swh archive.
"""
visit_type = "deposit"
def __init__(self, url: str, deposit_id: str):
"""Constructor
Args:
url: Origin url to associate the artifacts/metadata to
deposit_id: Deposit identity
"""
super().__init__(url=url)
config_deposit = self.config["deposit"]
self.deposit_id = deposit_id
self.client = ApiClient(url=config_deposit["url"], auth=config_deposit["auth"])
self.metadata: Dict[str, Any] = {}
def get_versions(self) -> Sequence[str]:
# only 1 branch 'HEAD' with no alias since we only have 1 snapshot
# branch
return ["HEAD"]
def get_package_info(
self, version: str
) -> Generator[Tuple[str, Mapping[str, Any]], None, None]:
p_info = {
"filename": "archive.zip",
"raw": self.metadata,
}
yield "HEAD", p_info
def download_package(
self, p_info: Mapping[str, Any], tmpdir: str
) -> List[Tuple[str, Mapping]]:
"""Override to allow use of the dedicated deposit client
"""
return [self.client.archive_get(self.deposit_id, tmpdir, p_info["filename"])]
def build_revision(
self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git
) -> Optional[Revision]:
depo = a_metadata.pop("deposit")
# Note:
# `date` and `committer_date` are always transmitted by the deposit read api
# which computes itself the values. The loader needs to use those to create the
# revision.
# date: codemeta:dateCreated if any, deposit completed_date otherwise
date = TimestampWithTimezone.from_dict(depo["author_date"])
# commit_date: codemeta:datePublished if any, deposit completed_date otherwise
commit_date = TimestampWithTimezone.from_dict(depo["committer_date"])
client, id, collection = [depo[k] for k in ["client", "id", "collection"]]
message = f"{client}: Deposit {id} in collection {collection}".encode("utf-8")
author = parse_author(depo["author"])
committer = parse_author(depo["committer"])
return Revision(
type=RevisionType.TAR,
message=message,
author=author,
date=date,
committer=committer,
committer_date=commit_date,
parents=[hash_to_bytes(p) for p in depo["revision_parents"]],
directory=directory,
synthetic=True,
metadata={
"extrinsic": {
"provider": self.client.metadata_url(self.deposit_id),
"when": self.visit_date.isoformat(),
"raw": a_metadata,
},
},
)
def load(self) -> Dict:
# First making sure the deposit is known prior to trigger a loading
try:
self.metadata = self.client.metadata_get(self.deposit_id)
except ValueError:
logger.error(f"Unknown deposit {self.deposit_id}, ignoring")
return {"status": "failed"}
# Then usual loading
r = super().load()
success = r["status"] != "failed"
if success:
# Update archive with metadata information
origin_metadata = self.metadata["origin_metadata"]
logger.debug("origin_metadata: %s", origin_metadata)
tools = self.storage.tool_add([origin_metadata["tool"]])
logger.debug("tools: %s", tools)
tool_id = tools[0]["id"]
provider = origin_metadata["provider"]
# FIXME: Shall we delete this info?
provider_id = self.storage.metadata_provider_add(
provider["provider_name"],
provider["provider_type"],
provider["provider_url"],
metadata=None,
)
metadata = origin_metadata["metadata"]
self.storage.origin_metadata_add(
self.url, self.visit_date, provider_id, tool_id, metadata
)
# Update deposit status
try:
if not success:
self.client.status_update(self.deposit_id, status="failed")
return r
snapshot_id = hash_to_bytes(r["snapshot_id"])
branches = self.storage.snapshot_get(snapshot_id)["branches"]
logger.debug("branches: %s", branches)
if not branches:
return r
rev_id = branches[b"HEAD"]["target"]
revisions = self.storage.revision_get([rev_id])
# FIXME: inconsistency between tests and production code
if isinstance(revisions, types.GeneratorType):
revisions = list(revisions)
revision = revisions[0]
# Retrieve the revision identifier
dir_id = revision["directory"]
# update the deposit's status to success with its
# revision-id and directory-id
self.client.status_update(
self.deposit_id,
status="done",
revision_id=hash_to_hex(rev_id),
directory_id=hash_to_hex(dir_id),
+ snapshot_id=r["snapshot_id"],
origin_url=self.url,
)
except Exception:
logger.exception("Problem when trying to update the deposit's status")
return {"status": "failed"}
return r
def parse_author(author) -> Person:
"""See prior fixme
"""
return Person(
fullname=author["fullname"].encode("utf-8"),
name=author["name"].encode("utf-8"),
email=author["email"].encode("utf-8"),
)
class ApiClient:
"""Private Deposit Api client
"""
def __init__(self, url, auth: Optional[Mapping[str, str]]):
self.base_url = url.rstrip("/")
self.auth = None if not auth else (auth["username"], auth["password"])
def do(self, method: str, url: str, *args, **kwargs):
"""Internal method to deal with requests, possibly with basic http
authentication.
Args:
method (str): supported http methods as in get/post/put
Returns:
The request's execution output
"""
method_fn = getattr(requests, method)
if self.auth:
kwargs["auth"] = self.auth
return method_fn(url, *args, **kwargs)
def archive_get(
self, deposit_id: Union[int, str], tmpdir: str, filename: str
) -> Tuple[str, Dict]:
"""Retrieve deposit's archive artifact locally
"""
url = f"{self.base_url}/{deposit_id}/raw/"
return download(url, dest=tmpdir, filename=filename, auth=self.auth)
def metadata_url(self, deposit_id: Union[int, str]) -> str:
return f"{self.base_url}/{deposit_id}/meta/"
def metadata_get(self, deposit_id: Union[int, str]) -> Dict[str, Any]:
"""Retrieve deposit's metadata artifact as json
"""
url = self.metadata_url(deposit_id)
r = self.do("get", url)
if r.ok:
return r.json()
msg = f"Problem when retrieving deposit metadata at {url}"
logger.error(msg)
raise ValueError(msg)
def status_update(
self,
deposit_id: Union[int, str],
status: str,
revision_id: Optional[str] = None,
directory_id: Optional[str] = None,
+ snapshot_id: Optional[str] = None,
origin_url: Optional[str] = None,
):
"""Update deposit's information including status, and persistent
identifiers result of the loading.
"""
url = f"{self.base_url}/{deposit_id}/update/"
payload = {"status": status}
if revision_id:
payload["revision_id"] = revision_id
if directory_id:
payload["directory_id"] = directory_id
+ if snapshot_id:
+ payload["snapshot_id"] = snapshot_id
if origin_url:
payload["origin_url"] = origin_url
self.do("put", url, json=payload)
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
index cf61891..0efb85a 100644
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -1,329 +1,375 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import pytest
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.loader.package.deposit.loader import DepositLoader
from swh.loader.package.tests.common import (
check_snapshot,
check_metadata_paths,
get_stats,
)
from swh.core.pytest_plugin import requests_mock_datadir_factory
+DEPOSIT_URL = "https://deposit.softwareheritage.org/1/private"
+
+
@pytest.fixture
def requests_mock_datadir(requests_mock_datadir):
"""Enhance default mock data to mock put requests as the loader does some
internal update queries there.
"""
requests_mock_datadir.put(re.compile("https"))
return requests_mock_datadir
def test_deposit_init_ok(swh_config, swh_loader_config):
url = "some-url"
deposit_id = 999
loader = DepositLoader(url, deposit_id) # Something that does not exist
assert loader.url == url
assert loader.client is not None
assert loader.client.base_url == swh_loader_config["deposit"]["url"]
def test_deposit_loading_unknown_deposit(swh_config, requests_mock_datadir):
"""Loading an unknown deposit should fail
no origin, no visit, no snapshot
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = "some-url"
unknown_deposit_id = 667
loader = DepositLoader(url, unknown_deposit_id) # does not exist
actual_load_status = loader.load()
assert actual_load_status == {"status": "failed"}
stats = get_stats(loader.storage)
assert {
"content": 0,
"directory": 0,
"origin": 0,
"origin_visit": 0,
"person": 0,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 0,
} == stats
requests_mock_datadir_missing_one = requests_mock_datadir_factory(
- ignore_urls=["https://deposit.softwareheritage.org/1/private/666/raw/",]
+ ignore_urls=[f"{DEPOSIT_URL}/666/raw/",]
)
def test_deposit_loading_failure_to_retrieve_1_artifact(
swh_config, requests_mock_datadir_missing_one
):
"""Deposit with missing artifact ends up with an uneventful/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = "some-url-2"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
assert actual_load_status["status"] == "uneventful"
assert actual_load_status["snapshot_id"] is not None
stats = get_stats(loader.storage)
assert {
"content": 0,
"directory": 0,
"origin": 1,
"origin_visit": 1,
"person": 0,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
origin_visit = loader.storage.origin_visit_get_latest(url)
assert origin_visit["status"] == "partial"
assert origin_visit["type"] == "deposit"
def test_revision_metadata_structure(swh_config, requests_mock_datadir):
url = "https://hal-test.archives-ouvertes.fr/some-external-id"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
expected_revision_id = hash_to_bytes("637318680351f5d78856d13264faebbd91efe9bb")
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(
revision["metadata"],
paths=[
("extrinsic.provider", str),
("extrinsic.when", str),
("extrinsic.raw", dict),
("original_artifact", list),
],
)
# Only 2 top-level keys now
assert set(revision["metadata"].keys()) == {"extrinsic", "original_artifact"}
for original_artifact in revision["metadata"]["original_artifact"]:
check_metadata_paths(
original_artifact,
paths=[("filename", str), ("length", int), ("checksums", dict),],
)
def test_deposit_loading_ok(swh_config, requests_mock_datadir):
url = "https://hal-test.archives-ouvertes.fr/some-external-id"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
stats = get_stats(loader.storage)
assert {
"content": 303,
"directory": 12,
"origin": 1,
"origin_visit": 1,
"person": 1,
"release": 0,
"revision": 1,
"skipped_content": 0,
"snapshot": 1,
} == stats
origin_visit = loader.storage.origin_visit_get_latest(url)
assert origin_visit["status"] == "full"
assert origin_visit["type"] == "deposit"
+ revision_id = "637318680351f5d78856d13264faebbd91efe9bb"
expected_branches = {
- "HEAD": {
- "target": "637318680351f5d78856d13264faebbd91efe9bb",
- "target_type": "revision",
- },
+ "HEAD": {"target": revision_id, "target_type": "revision",},
}
expected_snapshot = {
"id": expected_snapshot_id,
"branches": expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
+ revision = next(loader.storage.revision_get([hash_to_bytes(revision_id)]))
+ assert revision
+
# check metadata
tool = {
"name": "swh-deposit",
"version": "0.0.1",
"configuration": {"sword_version": "2",},
}
tool = loader.storage.tool_get(tool)
assert tool is not None
assert tool["id"] is not None
provider = {
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"metadata": None,
}
provider = loader.storage.metadata_provider_get_by(provider)
assert provider is not None
assert provider["id"] is not None
metadata = list(
loader.storage.origin_metadata_get_by(url, provider_type="deposit_client")
)
assert metadata is not None
assert isinstance(metadata, list)
assert len(metadata) == 1
metadata0 = metadata[0]
assert metadata0["provider_id"] == provider["id"]
assert metadata0["provider_type"] == "deposit_client"
assert metadata0["tool_id"] == tool["id"]
+ # Retrieve the information for deposit status update query to the deposit
+ urls = [
+ m
+ for m in requests_mock_datadir.request_history
+ if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
+ ]
+
+ assert len(urls) == 1
+ update_query = urls[0]
+
+ body = update_query.json()
+ expected_body = {
+ "status": "done",
+ "revision_id": revision_id,
+ "directory_id": hash_to_hex(revision["directory"]),
+ "snapshot_id": expected_snapshot_id,
+ "origin_url": url,
+ }
+
+ assert body == expected_body
+
def test_deposit_loading_ok_2(swh_config, requests_mock_datadir):
"""Field dates should be se appropriately
"""
external_id = "some-external-id"
url = f"https://hal-test.archives-ouvertes.fr/{external_id}"
deposit_id = 777
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c"
expected_branches = {"HEAD": {"target": revision_id, "target_type": "revision"}}
expected_snapshot = {
"id": expected_snapshot_id,
"branches": expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
origin_visit = loader.storage.origin_visit_get_latest(url)
# The visit is partial because some hash collision were detected
assert origin_visit["status"] == "full"
assert origin_visit["type"] == "deposit"
raw_meta = loader.client.metadata_get(deposit_id)
# Ensure the date fields are set appropriately in the revision
# Retrieve the revision
revision = next(loader.storage.revision_get([hash_to_bytes(revision_id)]))
assert revision
assert revision["date"] == raw_meta["deposit"]["author_date"]
assert revision["committer_date"] == raw_meta["deposit"]["committer_date"]
- read_api = f"https://deposit.softwareheritage.org/1/private/{deposit_id}/meta/"
+ read_api = f"{DEPOSIT_URL}/{deposit_id}/meta/"
assert revision["metadata"] == {
"extrinsic": {
"provider": read_api,
"raw": {
"origin": {"type": "deposit", "url": url,},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one",],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"codemeta:datePublished": "2017-10-08T15:00:00Z",
"external_identifier": "some-external-id",
"url": url,
},
"provider": {
"metadata": None,
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": "0.0.1",
},
},
},
"when": revision["metadata"]["extrinsic"]["when"], # dynamic
},
"original_artifact": [
{
"checksums": {
"sha1": "f8c63d7c890a7453498e6cf9fef215d85ec6801d",
"sha256": "474bf646aeeff6d945eb752b1a9f8a40f3d81a88909ee7bd2d08cc822aa361e6", # noqa
},
"filename": "archive.zip",
"length": 956830,
}
],
}
# Check the metadata swh side
origin_meta = list(
loader.storage.origin_metadata_get_by(url, provider_type="deposit_client")
)
assert len(origin_meta) == 1
origin_meta = origin_meta[0]
# dynamic, a pain to display and not that interesting
origin_meta.pop("discovery_date")
assert origin_meta == {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"codemeta:datePublished": "2017-10-08T15:00:00Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_url": "https://hal-test.archives-ouvertes.fr/some-external-id",
"provider_id": 1,
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"tool_id": 1,
}
+
+ # Retrieve the information for deposit status update query to the deposit
+ urls = [
+ m
+ for m in requests_mock_datadir.request_history
+ if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
+ ]
+
+ assert len(urls) == 1
+ update_query = urls[0]
+
+ body = update_query.json()
+ expected_body = {
+ "status": "done",
+ "revision_id": revision_id,
+ "directory_id": hash_to_hex(revision["directory"]),
+ "snapshot_id": expected_snapshot_id,
+ "origin_url": url,
+ }
+
+ assert body == expected_body
diff --git a/version.txt b/version.txt
index bc9071a..8016821 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.93-0-gfca7e47
\ No newline at end of file
+v0.0.94-0-g8bd38f9
\ No newline at end of file

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:04 PM (2 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3449900

Event Timeline