Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
# flake8: noqa | # flake8: noqa | ||||
# because of long lines | # because of long lines | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import json | import json | ||||
from unittest.mock import MagicMock, Mock, call | from unittest.mock import MagicMock, Mock, call | ||||
from swh.model.identifiers import parse_swhid | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | |||||
Origin, | Origin, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
) | ) | ||||
from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid | |||||
from swh.storage.migrate_extrinsic_metadata import ( | from swh.storage.migrate_extrinsic_metadata import ( | ||||
DEPOSIT_COLS, | DEPOSIT_COLS, | ||||
cran_package_from_url, | cran_package_from_url, | ||||
handle_row, | handle_row, | ||||
) | ) | ||||
FETCHER = MetadataFetcher( | FETCHER = MetadataFetcher( | ||||
name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | name="migrate-extrinsic-metadata-from-revisions", version="0.0.1", | ||||
▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines | row = { | ||||
"original_artifact": original_artifacts, | "original_artifact": original_artifacts, | ||||
**extrinsic_metadata, | **extrinsic_metadata, | ||||
}, | }, | ||||
} | } | ||||
origin_url = ( | origin_url = ( | ||||
"https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476" | "https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476" | ||||
) | ) | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
swhid = ( | swhid = ( | ||||
f"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" | f"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" | ||||
f";origin={origin_url}" | f";origin={origin_url}" | ||||
f";visit=swh:1:snp:14433c19dbb03ad57c86b58b53a800d6a0e32dd3" | f";visit=swh:1:snp:14433c19dbb03ad57c86b58b53a800d6a0e32dd3" | ||||
f";anchor=swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" | f";anchor=swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" | ||||
f";path=/" | f";path=/" | ||||
) | ) | ||||
Show All 40 Lines | def test_deposit_1(): | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc | 2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_DEPOSIT_AUTHORITY, | authority=SWH_DEPOSIT_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json", | format="sword-v2-atom-codemeta-v2-in-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" | "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc | 2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(original_artifacts).encode(), | metadata=json.dumps(original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" | "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
), | ), | ||||
"deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | ||||
"deposit_collection.name": "hal", | "deposit_collection.name": "hal", | ||||
"auth_user.username": "hal", | "auth_user.username": "hal", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" | origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc | 2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=HAL_AUTHORITY, | authority=HAL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", | format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" | "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc | 2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(original_artifacts).encode(), | metadata=json.dumps(original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" | "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
"deposit_collection.name": "swh", | "deposit_collection.name": "swh", | ||||
"auth_user.username": "swh", | "auth_user.username": "swh", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = ( | origin_url = ( | ||||
"https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420" | "https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420" | ||||
) | ) | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc | 2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_DEPOSIT_AUTHORITY, | authority=SWH_DEPOSIT_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json", | format="sword-v2-atom-codemeta-v2-in-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" | "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc | 2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(original_artifacts).encode(), | metadata=json.dumps(original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" | "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
), | ), | ||||
"deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | ||||
"deposit_collection.name": "hal", | "deposit_collection.name": "hal", | ||||
"auth_user.username": "hal", | "auth_user.username": "hal", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = "https://hal.archives-ouvertes.fr/hal-02960679" | origin_url = "https://hal.archives-ouvertes.fr/hal-02960679" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc | 2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=HAL_AUTHORITY, | authority=HAL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json", | format="sword-v2-atom-codemeta-v2-in-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" | "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc | 2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(original_artifacts).encode(), | metadata=json.dumps(original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" | "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
), | ), | ||||
"deposit_client.provider_url": "https://software.intel.com", | "deposit_client.provider_url": "https://software.intel.com", | ||||
"deposit_collection.name": "intel", | "deposit_collection.name": "intel", | ||||
"auth_user.username": "intel", | "auth_user.username": "intel", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff" | origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc | 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=INTEL_AUTHORITY, | authority=INTEL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json", | format="sword-v2-atom-codemeta-v2-in-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" | "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc | 2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=INTEL_AUTHORITY, | authority=INTEL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json", | format="sword-v2-atom-codemeta-v2-in-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" | "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc | 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" | "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 136 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
), | ), | ||||
"deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | ||||
"deposit_collection.name": "hal", | "deposit_collection.name": "hal", | ||||
"auth_user.username": "hal", | "auth_user.username": "hal", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" | origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc | 2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=HAL_AUTHORITY, | authority=HAL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", | format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8" | "swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
# note: no original artifacts | # note: no original artifacts | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
), | ), | ||||
"deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/", | ||||
"deposit_collection.name": "hal", | "deposit_collection.name": "hal", | ||||
"auth_user.username": "hal", | "auth_user.username": "hal", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781" | origin_url = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781" | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | assert storage.method_calls == [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc | 2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=HAL_AUTHORITY, | authority=HAL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", | format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4" | "swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
# note: no original artifacts | # note: no original artifacts | ||||
] | ] | ||||
def test_deposit_missing_metadata_in_revision(): | def test_deposit_ignore_origin_in_metadata(): | ||||
extrinsic_metadata = { | extrinsic_metadata = { | ||||
"id": "hal-01243573", | "id": "hal-01243573", | ||||
"@xmlns": "http://www.w3.org/2005/Atom", | "@xmlns": "http://www.w3.org/2005/Atom", | ||||
"author": {"name": "HAL", "email": "hal@ccsd.cnrs.fr"}, | "author": {"name": "HAL", "email": "hal@ccsd.cnrs.fr"}, | ||||
"client": "hal", | "client": "hal", | ||||
"committer": "Administrateur Du Ccsd", | "committer": "Administrateur Du Ccsd", | ||||
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243573", | "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243573", | ||||
"codemeta:name": "The assignment problem", | "codemeta:name": "The assignment problem", | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | deposit_rows = [ | ||||
"deposit_collection.name": "hal", | "deposit_collection.name": "hal", | ||||
"auth_user.username": "hal", | "auth_user.username": "hal", | ||||
}, | }, | ||||
] | ] | ||||
origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" | origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" | ||||
# /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573 | # /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573 | ||||
# do not trust the metadata! | # do not trust the metadata! | ||||
origin_swhid = SWHID( | |||||
object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() | |||||
) | |||||
storage = Mock() | storage = Mock() | ||||
def origin_get(urls): | def origin_get(urls): | ||||
assert urls == [origin_url] | assert urls == [origin_url] | ||||
return [Origin(url=origin_url)] | return [Origin(url=origin_url)] | ||||
storage.origin_get.side_effect = origin_get | storage.origin_get.side_effect = origin_get | ||||
deposit_cur = get_mock_deposit_cur(deposit_rows) | deposit_cur = get_mock_deposit_cur(deposit_rows) | ||||
handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) | ||||
deposit_cur.execute.assert_called_once() | deposit_cur.execute.assert_called_once() | ||||
deposit_cur.__iter__.assert_called_once() | deposit_cur.__iter__.assert_called_once() | ||||
assert storage.method_calls == [ | expected = [ | ||||
call.origin_get([origin_url]), | call.origin_get([origin_url]), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc | 2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=HAL_AUTHORITY, | authority=HAL_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="sword-v2-atom-codemeta-v2-in-json", | format="sword-v2-atom-codemeta-v2-in-json", | ||||
metadata=json.dumps(extrinsic_metadata).encode(), | metadata=json.dumps(extrinsic_metadata).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" | "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
call.raw_extrinsic_metadata_add( | call.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.DIRECTORY, | |||||
target=DIRECTORY_SWHID, | target=DIRECTORY_SWHID, | ||||
discovery_date=datetime.datetime( | discovery_date=datetime.datetime( | ||||
2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc | 2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc | ||||
), | ), | ||||
authority=SWH_AUTHORITY, | authority=SWH_AUTHORITY, | ||||
fetcher=FETCHER, | fetcher=FETCHER, | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(dest_original_artifacts).encode(), | metadata=json.dumps(dest_original_artifacts).encode(), | ||||
origin=origin_url, | origin=origin_swhid, | ||||
revision=parse_swhid( | revision=parse_swhid( | ||||
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" | "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" | ||||
), | ), | ||||
), | ), | ||||
] | ] | ||||
), | ), | ||||
] | ] | ||||
assert storage.method_calls == expected |