diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -75,7 +75,15 @@ # FIXME: the deposit no longer needs to build the revision + # Note: + # `date` and `committer_date` are always transmitted by the deposit read api + # which computes itself the values. The loader needs to use those to create the + # revision. + + # date: codemeta:dateCreated if any, deposit completed_date otherwise date = TimestampWithTimezone.from_dict(revision_data["date"]) + # commit_date: codemeta:datePublished if any, deposit completed_date otherwise + commit_date = TimestampWithTimezone.from_dict(revision_data["committer_date"]) metadata = revision_data["metadata"] metadata.update( { @@ -93,7 +101,7 @@ author=parse_author(revision_data["author"]), date=date, committer=parse_author(revision_data["committer"]), - committer_date=date, + committer_date=commit_date, parents=[hash_to_bytes(p) for p in revision_data.get("parents", [])], directory=directory, synthetic=True, diff --git a/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_meta b/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_meta new file mode 120000 --- /dev/null +++ b/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_meta @@ -0,0 +1 @@ +hello_2.11.json \ No newline at end of file diff --git a/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_raw b/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_raw new file mode 120000 --- /dev/null +++ b/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_raw @@ -0,0 +1 @@ +hello-2.10.zip \ No newline at end of file diff --git a/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.11.json b/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.11.json new file mode 100644 --- /dev/null +++ b/swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.11.json @@ -0,0 +1,82 @@ +{ + "origin": { + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", + "type": "deposit" + }, + "origin_metadata": { + "metadata": { + "@xmlns": [ + "http://www.w3.org/2005/Atom" + ], + "author": [ + "some awesome author", + "another one", + "no one" + ], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "codemeta:datePublished": "2017-10-08T15:00:00Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id" + }, + "provider": { + "provider_name": "hal", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/", + "metadata": null + }, + "tool": { + "name": "swh-deposit", + "version": "0.0.1", + "configuration": { + "sword_version": "2" + } + } + }, + "revision": { + "synthetic": true, + "committer_date": { + "timestamp": { + "seconds": 1507474800, + "microseconds": 0 + }, + "offset": 0, + "negative_utc": false + }, + "message": "hal: Deposit 777 in collection hal", + "author": { + "name": "Software Heritage", + "fullname": "Software Heritage", + "email": "robot@softwareheritage.org" + }, + "committer": { + "name": "Software Heritage", + "fullname": "Software Heritage", + "email": "robot@softwareheritage.org" + }, + "date": { + "timestamp": { + "seconds": 1507389428, + "microseconds": 0 + }, + "offset": 0, + "negative_utc": false + }, + "metadata": { + "@xmlns": [ + "http://www.w3.org/2005/Atom" + ], + "author": [ + "some awesome author", + "another one", + "no one" + ], + "external_identifier": "some-external-id", + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "codemeta:datePublished": "2017-10-08T15:00:00Z", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id" + }, + "type": "tar", + "parents": [] + }, + "branch_name": "master" +} diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -211,3 +211,44 @@ assert metadata0["provider_id"] == provider["id"] assert metadata0["provider_type"] == "deposit_client" assert metadata0["tool_id"] == tool["id"] + + +def test_deposit_loading_ok_2(swh_config, requests_mock_datadir): + """Field dates should be se appropriately + + """ + url = "https://hal-test.archives-ouvertes.fr/some-external-id" + deposit_id = 777 + loader = DepositLoader(url, deposit_id) + + actual_load_status = loader.load() + expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192" + + assert actual_load_status == { + "status": "eventful", + "snapshot_id": expected_snapshot_id, + } + + revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c" + expected_branches = {"HEAD": {"target": revision_id, "target_type": "revision"}} + expected_snapshot = { + "id": expected_snapshot_id, + "branches": expected_branches, + } + + check_snapshot(expected_snapshot, storage=loader.storage) + + origin_visit = loader.storage.origin_visit_get_latest(url) + + # The visit is partial because some hash collision were detected + assert origin_visit["status"] == "full" + assert origin_visit["type"] == "deposit" + + raw_meta = loader.client.metadata_get(deposit_id) + # Ensure the date fields are set appropriately in the revision + + # Retrieve the revision + revision = next(loader.storage.revision_get([hash_to_bytes(revision_id)])) + assert revision + for field_date in ["committer_date", "date"]: + assert revision[field_date] == raw_meta["revision"][field_date]