Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9125920
test_deposit.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
50 KB
Subscribers
None
test_deposit.py
View Options
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# flake8: noqa
# because of long lines
import
copy
import
datetime
import
json
from
unittest.mock
import
MagicMock
,
Mock
,
call
from
swh.model.model
import
(
MetadataAuthority
,
MetadataAuthorityType
,
MetadataFetcher
,
Origin
,
RawExtrinsicMetadata
,
)
from
swh.model.swhids
import
CoreSWHID
,
ExtendedObjectType
,
ExtendedSWHID
from
swh.storage.migrate_extrinsic_metadata
import
(
DEPOSIT_COLS
,
cran_package_from_url
,
handle_row
,
)
FETCHER
=
MetadataFetcher
(
name
=
"migrate-extrinsic-metadata-from-revisions"
,
version
=
"0.0.1"
,
)
SWH_AUTHORITY
=
MetadataAuthority
(
type
=
MetadataAuthorityType
.
REGISTRY
,
url
=
"https://softwareheritage.org/"
,
metadata
=
{},
)
SWH_DEPOSIT_AUTHORITY
=
MetadataAuthority
(
type
=
MetadataAuthorityType
.
DEPOSIT_CLIENT
,
url
=
"https://www.softwareheritage.org"
,
metadata
=
{},
)
HAL_AUTHORITY
=
MetadataAuthority
(
type
=
MetadataAuthorityType
.
DEPOSIT_CLIENT
,
url
=
"https://hal.archives-ouvertes.fr/"
,
metadata
=
{},
)
INTEL_AUTHORITY
=
MetadataAuthority
(
type
=
MetadataAuthorityType
.
DEPOSIT_CLIENT
,
url
=
"https://software.intel.com"
,
metadata
=
{},
)
DIRECTORY_ID
=
b
"a"
*
20
DIRECTORY_SWHID
=
ExtendedSWHID
(
object_type
=
ExtendedObjectType
.
DIRECTORY
,
object_id
=
DIRECTORY_ID
)
def
get_mock_deposit_cur
(
row_dicts
):
rows
=
[
tuple
(
d
[
key
]
for
key
in
DEPOSIT_COLS
)
for
d
in
row_dicts
]
deposit_cur
=
MagicMock
()
deposit_cur
.
__iter__
.
side_effect
=
[
iter
(
rows
)]
return
deposit_cur
def
test_deposit_1
():
"""Has a provider and xmlns, and the metadata is in the revision twice
(at the root of the metadata dict, and in
metadata->extrinsic->raw->origin_metadata)"""
extrinsic_metadata
=
{
"title"
:
"Je suis GPL"
,
"@xmlns"
:
"http://www.w3.org/2005/Atom"
,
"client"
:
"swh"
,
"codemeta:url"
:
"https://forge.softwareheritage.org/source/jesuisgpl/"
,
"@xmlns:codemeta"
:
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
,
"codemeta:author"
:
{
"codemeta:name"
:
"Stefano Zacchiroli"
,
"codemeta:jobTitle"
:
"Maintainer"
,
},
"codemeta:license"
:
{
"codemeta:url"
:
"https://spdx.org/licenses/GPL-3.0-or-later.html"
,
"codemeta:name"
:
"GNU General Public License v3.0 or later"
,
},
# ...
}
original_artifacts
=
[
{
"length"
:
80880
,
"filename"
:
"archive.zip"
,
"checksums"
:
{
"sha1"
:
"bad32a47a359e0e16ebdca2ad2dc6a771dac8f71"
,
"sha256"
:
"182b7ee3b7b5b550e83d3bcfed029bb2f625ee760ebfe9557d5fd072bd4e22e4"
,
},
}
]
row
=
{
"id"
:
b
"
\x02
#
\x10\xdf\x16\xfd\x9e
MO
\x81\xfe
6
\xa1
B
\xe8
-
\xb9
w
\xc0\x1d
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2018
,
1
,
5
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2018
,
1
,
5
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"swh: Deposit 467 in collection swh"
,
"metadata"
:
{
"client"
:
"swh"
,
"extrinsic"
:
{
"raw"
:
{
"origin"
:
{
"url"
:
"https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476"
,
"type"
:
"deposit"
,
},
"branch_name"
:
"master"
,
"origin_metadata"
:
{
"tool"
:
{
"name"
:
"swh-deposit"
,
"version"
:
"0.0.1"
,
"configuration"
:
{
"sword_version"
:
2
},
},
"metadata"
:
extrinsic_metadata
,
},
},
"when"
:
"2020-03-11T11:11:36.336283+00:00"
,
"provider"
:
"https://deposit.softwareheritage.org/1/private/467/meta/"
,
},
"original_artifact"
:
original_artifacts
,
**
extrinsic_metadata
,
},
}
origin_url
=
(
"https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476"
)
swhid
=
(
f
"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
f
";origin={origin_url}"
f
";visit=swh:1:snp:14433c19dbb03ad57c86b58b53a800d6a0e32dd3"
f
";anchor=swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
f
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
467
,
"deposit.external_id"
:
"check-deposit-2020-03-11T11:07:18.424476"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2020
,
3
,
11
,
11
,
7
,
18
,
688410
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://www.softwareheritage.org"
,
"deposit_collection.name"
:
"swh"
,
"auth_user.username"
:
"swh"
,
},
{
"deposit.id"
:
467
,
"deposit.external_id"
:
"check-deposit-2020-03-11T11:07:18.424476"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2020
,
3
,
11
,
11
,
7
,
18
,
669428
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://www.softwareheritage.org"
,
"deposit_collection.name"
:
"swh"
,
"auth_user.username"
:
"swh"
,
},
]
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
3
,
11
,
11
,
7
,
18
,
688410
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_DEPOSIT_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
3
,
11
,
11
,
11
,
36
,
336283
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
),
),
]
),
]
def
test_deposit_2_without_xmlns
():
"""Has a provider, no xmlns, and the metadata is only in
metadata->extrinsic->raw->origin_metadata)"""
extrinsic_metadata
=
{
"{http://www.w3.org/2005/Atom}id"
:
"hal-01243573"
,
"{http://www.w3.org/2005/Atom}author"
:
{
"{http://www.w3.org/2005/Atom}name"
:
"HAL"
,
"{http://www.w3.org/2005/Atom}email"
:
"hal@ccsd.cnrs.fr"
,
},
"{http://www.w3.org/2005/Atom}client"
:
"hal"
,
"{http://www.w3.org/2005/Atom}external_identifier"
:
"hal-01243573"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}url"
:
"https://hal-test.archives-ouvertes.fr/hal-01243573"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name"
:
"The assignment problem"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author"
:
{
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name"
:
"Morane Gruenpeter"
},
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}version"
:
1
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}identifier"
:
"10.5281/zenodo.438684"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}dateCreated"
:
"2017-11-16T14:54:23+01:00"
,
}
original_artifacts
=
[
{
"length"
:
208357
,
"filename"
:
"archive.zip"
,
"checksums"
:
{
"sha1"
:
"fa0aec08e8a44ea144dba7ce366c8b5d66c14453"
,
"sha256"
:
"f53c05fe947e88ce83751a93bd522b1f88478ea2e7b984c07fc7a7c68128bf87"
,
},
}
]
row
=
{
"id"
:
b
"
\x01\x16\xca\xb7\x19
d
\xd5\x9c\x85
p
\xb4\xc5
r
\x9b
(
\xbd\xd6
<
\x9b
F"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
54
,
0
,
723882
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
54
,
0
,
723882
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"hal: Deposit 82 in collection hal"
,
"metadata"
:
{
"extrinsic"
:
{
"raw"
:
{
"origin"
:
{
"url"
:
"https://hal.archives-ouvertes.fr/hal-01243573"
,
"type"
:
"deposit"
,
},
"origin_metadata"
:
{
"tool"
:
{
"name"
:
"swh-deposit"
,
"version"
:
"0.0.1"
,
"configuration"
:
{
"sword_version"
:
2
},
},
"metadata"
:
extrinsic_metadata
,
"provider"
:
{
"metadata"
:
{},
"provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"provider_name"
:
"hal"
,
"provider_type"
:
"deposit_client"
,
},
},
},
"when"
:
"2020-05-15T14:27:21.462270+00:00"
,
"provider"
:
"https://deposit.softwareheritage.org/1/private/82/meta/"
,
},
"original_artifact"
:
original_artifacts
,
},
}
swhid
=
(
"swh:1:dir:e04b2a7b8a8838da0693e9fd992a10d6fd211b50"
";origin=https://hal.archives-ouvertes.fr/hal-01243573"
";visit=swh:1:snp:abc9ae594245a740235b6c039f044352a5f723ec"
";anchor=swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
82
,
"deposit.external_id"
:
"hal-01243573"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
54
,
1
,
533972
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
{
"deposit.id"
:
82
,
"deposit.external_id"
:
"hal-01243573"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
54
,
0
,
413748
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
]
origin_url
=
"https://hal.archives-ouvertes.fr/hal-01243573"
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
54
,
0
,
413748
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
HAL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
5
,
15
,
14
,
27
,
21
,
462270
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
),
),
]
),
]
def
test_deposit_2_with_xmlns
():
"""Has a provider, xmlns, and the metadata is only in
metadata->extrinsic->raw->origin_metadata)"""
extrinsic_metadata
=
{
"title"
:
"Je suis GPL"
,
"@xmlns"
:
"http://www.w3.org/2005/Atom"
,
"client"
:
"swh"
,
"codemeta:url"
:
"https://forge.softwareheritage.org/source/jesuisgpl/"
,
"@xmlns:codemeta"
:
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
,
"codemeta:author"
:
{
"codemeta:name"
:
"Stefano Zacchiroli"
,
"codemeta:jobTitle"
:
"Maintainer"
,
},
"codemeta:license"
:
{
"codemeta:url"
:
"https://spdx.org/licenses/GPL-3.0-or-later.html"
,
"codemeta:name"
:
"GNU General Public License v3.0 or later"
,
},
"external_identifier"
:
"je-suis-gpl"
,
"codemeta:dateCreated"
:
"2018-01-05"
,
}
original_artifacts
=
[
{
"length"
:
80880
,
"filename"
:
"archive.zip"
,
"checksums"
:
{
"sha1"
:
"bad32a47a359e0e16ebdca2ad2dc6a771dac8f71"
,
"sha256"
:
"182b7ee3b7b5b550e83d3bcfed029bb2f625ee760ebfe9557d5fd072bd4e22e4"
,
},
}
]
row
=
{
"id"
:
b
'
\x01
"
\x96
nP
\x93\x17\xae\xce
jA
\xd0\xf0\x88\xda
s<
\xc0\x9d\x0f
'
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2018
,
1
,
5
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2018
,
1
,
5
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"swh: Deposit 687 in collection swh"
,
"metadata"
:
{
"extrinsic"
:
{
"raw"
:
{
"origin"
:
{
"url"
:
"https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
,
"type"
:
"deposit"
,
},
"origin_metadata"
:
{
"tool"
:
{
"name"
:
"swh-deposit"
,
"version"
:
"0.0.1"
,
"configuration"
:
{
"sword_version"
:
2
},
},
"metadata"
:
extrinsic_metadata
,
"provider"
:
{
"metadata"
:
{},
"provider_url"
:
"https://www.softwareheritage.org"
,
"provider_name"
:
"swh"
,
"provider_type"
:
"deposit_client"
,
},
},
},
"when"
:
"2020-06-26T13:50:22.640625+00:00"
,
"provider"
:
"https://deposit.softwareheritage.org/1/private/687/meta/"
,
},
"original_artifact"
:
original_artifacts
,
},
}
swhid
=
(
"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
";origin=https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
";visit=swh:1:snp:8fd469e280fb0724175c64906627f619143d5bdb"
";anchor=swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
687
,
"deposit.external_id"
:
"check-deposit-2020-06-26T13:50:07.564420"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2020
,
6
,
26
,
13
,
50
,
8
,
216113
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://www.softwareheritage.org"
,
"deposit_collection.name"
:
"swh"
,
"auth_user.username"
:
"swh"
,
},
{
"deposit.id"
:
687
,
"deposit.external_id"
:
"check-deposit-2020-06-26T13:50:07.564420"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2020
,
6
,
26
,
13
,
50
,
8
,
150498
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://www.softwareheritage.org"
,
"deposit_collection.name"
:
"swh"
,
"auth_user.username"
:
"swh"
,
},
]
origin_url
=
(
"https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
)
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
6
,
26
,
13
,
50
,
8
,
216113
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_DEPOSIT_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
6
,
26
,
13
,
50
,
22
,
640625
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
),
),
]
),
]
def
test_deposit_2_with_json_in_json_and_no_xmlns
():
"""New formats introduced in https://forge.softwareheritage.org/D4105 ,
where the raw metadata is itself JSONed inside the metadata JSON tree
and https://forge.softwareheritage.org/D4065 where the @xmlns declarations
are stripped before being sent to the deposit DB"""
extrinsic_metadata
=
{
"id"
:
"hal-02960679"
,
"author"
:
{
"name"
:
"HAL"
,
"email"
:
"hal@ccsd.cnrs.fr"
},
"client"
:
"hal"
,
"codemeta:url"
:
"https://hal.archives-ouvertes.fr/hal-02960679"
,
"codemeta:name"
:
"Compressive Spectral Clustering Toolbox"
,
"codemeta:author"
:
[
{
"codemeta:name"
:
"Nicolas Tremblay"
,
"codemeta:affiliation"
:
"PANAMA"
},
{
"codemeta:name"
:
"Gilles Puy"
,
"codemeta:affiliation"
:
"PANAMA"
},
{
"codemeta:name"
:
"R{
\\
'e}mi Gribonval"
,
"codemeta:affiliation"
:
"PANAMA"
},
{
"codemeta:name"
:
"Pierre Vandergheynst"
},
],
# ...
}
original_artifacts
=
[
{
"url"
:
"https://deposit.softwareheritage.org/1/private/1037/raw/"
,
"length"
:
4546913
,
"filename"
:
"archive.zip"
,
"checksums"
:
{
"sha1"
:
"01a0069c626a383de9a17ace40ecfd588e5c4f26"
,
"sha256"
:
"c780a6de91286c70ceecc69fe0c6d201d3fe944aa89e193f3a89ae85dc25c3b1"
,
},
}
]
row
=
{
"id"
:
b
"J
\x9d
c{
\xa5\x07\xa2\xb9
3e%
\x04
(
\xe6\xe3\xf0
!
\xf1\x94\xd0
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2016
,
1
,
29
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2020
,
10
,
8
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"hal: Deposit 1037 in collection hal"
,
"metadata"
:
{
"extrinsic"
:
{
"raw"
:
{
"origin"
:
{
"url"
:
"https://hal.archives-ouvertes.fr/hal-02960679"
,
"type"
:
"deposit"
,
},
"origin_metadata"
:
{
"tool"
:
{
"name"
:
"swh-deposit"
,
"version"
:
"0.2.0"
,
"configuration"
:
{
"sword_version"
:
"2"
},
},
"metadata"
:
json
.
dumps
(
extrinsic_metadata
),
"provider"
:
{
"metadata"
:
{},
"provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"provider_name"
:
"hal"
,
"provider_type"
:
"deposit_client"
,
},
},
},
"when"
:
"2020-10-09T13:38:25.888646+00:00"
,
"provider"
:
"https://deposit.softwareheritage.org/1/private/1037/meta/"
,
},
"original_artifact"
:
original_artifacts
,
},
}
swhid
=
(
"swh:1:dir:8bfdf74037ae1c51335995891c6226e0f85e46e2"
";origin=https://hal.archives-ouvertes.fr/hal-02960679"
";visit=swh:1:snp:bc4a2ddf84dd0cc13d74e1970a1471c2574ed6aa"
";anchor=swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
1037
,
"deposit.external_id"
:
"hal-02960679"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"done"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2020
,
10
,
9
,
13
,
38
,
8
,
269611
,
tzinfo
=
datetime
.
timezone
.
utc
,
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
{
"deposit.id"
:
1037
,
"deposit.external_id"
:
"hal-02960679"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"done"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2020
,
10
,
9
,
13
,
38
,
7
,
394544
,
tzinfo
=
datetime
.
timezone
.
utc
,
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
]
origin_url
=
"https://hal.archives-ouvertes.fr/hal-02960679"
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
10
,
9
,
13
,
38
,
7
,
394544
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
HAL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
10
,
9
,
13
,
38
,
25
,
888646
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
),
),
]
),
]
def
test_deposit_3_and_wrong_external_id_in_metadata
():
extrinsic_metadata
=
{
"title"
:
"VTune Perf tool"
,
"@xmlns"
:
"http://www.w3.org/2005/Atom"
,
"client"
:
"swh"
,
"codemeta:url"
:
"https://software.intel.com/en-us/vtune"
,
"@xmlns:codemeta"
:
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
,
"codemeta:author"
:
{
"codemeta:name"
:
"VTune developer"
,
"codemeta:jobTitle"
:
"Software Engineer"
,
},
"external_identifier"
:
"vtune-perf-tool"
,
"codemeta:dateCreated"
:
"2019-05-14"
,
"codemeta:description"
:
"Modified version of Linux Perf tool which is used by Intel VTune Amplifier"
,
}
source_original_artifacts
=
[
{
"name"
:
"archive.zip"
,
"sha1"
:
"07251dbb1d904d143fd7da9935701f17670d4d9b"
,
"length"
:
4350528
,
"sha256"
:
"1f7d111ac79e468002f3edf4b7b2487538d41f6bea362d49b2eb08a537efafb6"
,
"sha1_git"
:
"e2d894efcaad4ff36f09eda3b3c0096416b03429"
,
"blake2s256"
:
"e2c08b82efbc361fbb2d28aa8352668cd71217f165f63de16b61ed61ace7509d"
,
"archive_type"
:
"zip"
,
}
]
dest_original_artifacts
=
[
{
"length"
:
4350528
,
"archive_type"
:
"zip"
,
"filename"
:
"archive.zip"
,
"checksums"
:
{
"sha1"
:
"07251dbb1d904d143fd7da9935701f17670d4d9b"
,
"sha256"
:
"1f7d111ac79e468002f3edf4b7b2487538d41f6bea362d49b2eb08a537efafb6"
,
"sha1_git"
:
"e2d894efcaad4ff36f09eda3b3c0096416b03429"
,
"blake2s256"
:
"e2c08b82efbc361fbb2d28aa8352668cd71217f165f63de16b61ed61ace7509d"
,
},
}
]
row
=
{
"id"
:
b
"
\t
5`S
\xc4\x9a\xd0\xf9\xe6
.Q
\xc2\x9d
>a|y
\x11
@
\xdf
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2019
,
5
,
14
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2019
,
5
,
14
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"intel: Deposit 268 in collection intel"
,
"metadata"
:
{
**
extrinsic_metadata
,
"original_artifact"
:
source_original_artifacts
,
},
}
swhid
=
(
"swh:1:dir:527c8e4a67d391f2bf1bbc86dd94af5d5cfc8ef7"
";origin=https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"
";visit=swh:1:snp:49d60943d9c061da1aba6266a811412f9db8de2e"
";anchor=swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
268
,
"deposit.external_id"
:
"f80482de-90a8-4c32-bce4-6f6918d492ff"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
49
,
36
,
775072
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://software.intel.com"
,
"deposit_collection.name"
:
"intel"
,
"auth_user.username"
:
"intel"
,
},
{
"deposit.id"
:
268
,
"deposit.external_id"
:
"f80482de-90a8-4c32-bce4-6f6918d492ff"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
49
,
36
,
477061
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://software.intel.com"
,
"deposit_collection.name"
:
"intel"
,
"auth_user.username"
:
"intel"
,
},
{
"deposit.id"
:
268
,
"deposit.external_id"
:
"f80482de-90a8-4c32-bce4-6f6918d492ff"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
28
,
33
,
210100
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://software.intel.com"
,
"deposit_collection.name"
:
"intel"
,
"auth_user.username"
:
"intel"
,
},
{
"deposit.id"
:
268
,
"deposit.external_id"
:
"f80482de-90a8-4c32-bce4-6f6918d492ff"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
28
,
33
,
41454
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://software.intel.com"
,
"deposit_collection.name"
:
"intel"
,
"auth_user.username"
:
"intel"
,
},
]
origin_url
=
"https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
49
,
36
,
775072
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
INTEL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
28
,
33
,
210100
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
INTEL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2019
,
5
,
14
,
7
,
49
,
36
,
775072
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
dest_original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
),
),
]
),
]
def
test_deposit_3_and_no_swhid
():
extrinsic_metadata
=
{
"id"
:
"hal-02337300"
,
"@xmlns"
:
"http://www.w3.org/2005/Atom"
,
"author"
:
{
"name"
:
"HAL"
,
"email"
:
"hal@ccsd.cnrs.fr"
},
"client"
:
"hal"
,
"codemeta:url"
:
"https://hal.archives-ouvertes.fr/hal-02337300"
,
"codemeta:name"
:
"R package SMM, Simulation and Estimation of Multi-State Discrete-Time Semi-Markov and Markov Models"
,
"@xmlns:codemeta"
:
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
,
"codemeta:author"
:
[
# ...
],
# ...
}
original_artifacts
=
[
# ...
]
row
=
{
"id"
:
b
"
\x91\xe5\xca\x8b
'K
\xf1\xa8
cFd2
\xd7
Q
\xf7
A
\xbc\x94\xba
&"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2017
,
1
,
1
,
0
,
0
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2019
,
11
,
6
,
14
,
47
,
30
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"hal: Deposit 342 in collection hal"
,
"metadata"
:
{
**
extrinsic_metadata
,
"original_artifact"
:
original_artifacts
,
},
}
storage
=
Mock
()
deposit_cur
=
None
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
assert
storage
.
method_calls
==
[]
def
test_deposit_3_and_unknown_deposit
():
extrinsic_metadata
=
{
"title"
:
"Je suis GPL"
,
"@xmlns"
:
"http://www.w3.org/2005/Atom"
,
"client"
:
"swh"
,
"codemeta:url"
:
"https://forge.softwareheritage.org/source/jesuisgpl/"
,
"@xmlns:codemeta"
:
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
,
"codemeta:author"
:
{
"codemeta:name"
:
"Stefano Zacchiroli"
,
"codemeta:jobTitle"
:
"Maintainer"
,
},
# ...
}
row
=
{
"id"
:
b
"
\x8e\x9c\xee\x14\xa6\xad
9
\xbc\xa4
4pw
\xb8\x7f\xb5\xbb\xd8\x95
;
\xb1
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2018
,
7
,
23
,
12
,
25
,
45
,
907132
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2018
,
7
,
23
,
12
,
25
,
45
,
907132
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"swh: Deposit 159 in collection swh"
,
"metadata"
:
extrinsic_metadata
,
}
origin_url
=
"https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"
storage
=
Mock
()
deposit_cur
=
None
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
assert
storage
.
method_calls
==
[]
def
test_deposit_4_without_xmlns
():
extrinsic_metadata
=
{
"{http://www.w3.org/2005/Atom}id"
:
"hal-01243573"
,
"{http://www.w3.org/2005/Atom}author"
:
{
"{http://www.w3.org/2005/Atom}name"
:
"HAL"
,
"{http://www.w3.org/2005/Atom}email"
:
"hal@ccsd.cnrs.fr"
,
},
"{http://www.w3.org/2005/Atom}client"
:
"hal"
,
"{http://www.w3.org/2005/Atom}external_identifier"
:
"hal-01243573"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}url"
:
"https://hal-test.archives-ouvertes.fr/hal-01243573"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name"
:
"The assignment problem"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author"
:
{
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name"
:
"Morane Gruenpeter"
},
# ...
}
row
=
{
"id"
:
b
"
\x03\x98\x7f\x05
n
\xaf
E
\x96\xcd
\xd7\xb2\xee\x01\xc9\xb8
L
\xed\xdf\xa8
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
49
,
30
,
902891
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
49
,
30
,
902891
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
": Deposit 79 in collection hal"
,
"metadata"
:
extrinsic_metadata
,
}
swhid
=
(
"swh:1:dir:e04b2a7b8a8838da0693e9fd992a10d6fd211b50"
";origin=https://hal.archives-ouvertes.fr/hal-01243573"
";visit=swh:1:snp:c31851534c86676a040fb10f438728c90f1c9d55"
";anchor=swh:1:rev:43549ebbe70c9cdf0be1647e6319392eaa06f3a3"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
79
,
"deposit.external_id"
:
"hal-01243573"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
49
,
31
,
208347
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
{
"deposit.id"
:
79
,
"deposit.external_id"
:
"hal-01243573"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
49
,
30
,
645576
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
]
origin_url
=
"https://hal.archives-ouvertes.fr/hal-01243573"
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2018
,
1
,
17
,
12
,
49
,
30
,
645576
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
HAL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8"
),
),
]
),
# note: no original artifacts
]
def
test_deposit_4_wrong_origin
():
extrinsic_metadata
=
{
"{http://www.w3.org/2005/Atom}id"
:
"hal-01588781"
,
"{http://www.w3.org/2005/Atom}author"
:
{
"{http://www.w3.org/2005/Atom}name"
:
"HAL"
,
"{http://www.w3.org/2005/Atom}email"
:
"hal@ccsd.cnrs.fr"
,
},
"{http://www.w3.org/2005/Atom}client"
:
"hal"
,
"{http://www.w3.org/2005/Atom}external_identifier"
:
"hal-01588781"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}url"
:
"https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name"
:
"The assignment problem "
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author"
:
{
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name"
:
"Morane Gruenpeter"
,
"{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}affiliation"
:
"Initiative pour la Recherche et l'Innovation sur le Logiciel Libre"
,
},
# ...
}
row
=
{
"id"
:
b
"-{
\xce
c
\x1f\xc7\x91\x08\x03\x11\xeb\x83\\
GB
\x8e
Xjn
\xa4
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2018
,
1
,
10
,
13
,
14
,
51
,
77033
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2018
,
1
,
10
,
13
,
14
,
51
,
77033
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
": Deposit 75 in collection hal"
,
"metadata"
:
extrinsic_metadata
,
}
swhid
=
(
"swh:1:dir:d8971c651fe256942aa4499a3ccdbaa305d3bade"
";origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
";visit=swh:1:snp:7c70cc8ea5b79e376605fd6e9b3b04d98861ffc0"
";anchor=swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
75
,
"deposit.external_id"
:
"hal-01588781"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2018
,
1
,
10
,
13
,
14
,
51
,
523963
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
{
"deposit.id"
:
75
,
"deposit.external_id"
:
"hal-01588781"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2018
,
1
,
10
,
13
,
14
,
50
,
555143
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
]
origin_url
=
"https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2018
,
1
,
10
,
13
,
14
,
50
,
555143
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
HAL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4"
),
),
]
),
# note: no original artifacts
]
def
test_deposit_missing_metadata_in_revision
():
extrinsic_metadata
=
{
"id"
:
"hal-01243573"
,
"@xmlns"
:
"http://www.w3.org/2005/Atom"
,
"author"
:
{
"name"
:
"HAL"
,
"email"
:
"hal@ccsd.cnrs.fr"
},
"client"
:
"hal"
,
"committer"
:
"Administrateur Du Ccsd"
,
"codemeta:url"
:
"https://hal-test.archives-ouvertes.fr/hal-01243573"
,
"codemeta:name"
:
"The assignment problem"
,
"@xmlns:codemeta"
:
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
,
"codemeta:author"
:
{
"codemeta:name"
:
"Morane Gruenpeter"
},
"codemeta:version"
:
"1"
,
"codemeta:identifier"
:
{
"#text"
:
"10.5281/zenodo.438684"
,
"@name"
:
"doi"
,
},
"external_identifier"
:
"hal-01243573"
,
"codemeta:dateCreated"
:
"2017-11-16T14:54:23+01:00"
,
}
source_original_artifacts
=
[
{
"name"
:
"archive.zip"
,
"sha1"
:
"e8e46324970cd5af7f98c5a86f33f47fa4a41b4a"
,
"length"
:
118650
,
"sha256"
:
"fec81b63d666c43524f966bbd3263da5bee55051d2b48c1659cca5f56fd953e5"
,
"sha1_git"
:
"9da2bbd08bec590b36ede2ed43d74cd510b10a79"
,
"blake2s256"
:
"5d0973ba3644cc2bcfdb41ff1891744337d6aa9547a7e59fe466f684b027f295"
,
"archive_type"
:
"zip"
,
}
]
dest_original_artifacts
=
[
{
"length"
:
118650
,
"archive_type"
:
"zip"
,
"filename"
:
"archive.zip"
,
"checksums"
:
{
"sha1"
:
"e8e46324970cd5af7f98c5a86f33f47fa4a41b4a"
,
"sha256"
:
"fec81b63d666c43524f966bbd3263da5bee55051d2b48c1659cca5f56fd953e5"
,
"sha1_git"
:
"9da2bbd08bec590b36ede2ed43d74cd510b10a79"
,
"blake2s256"
:
"5d0973ba3644cc2bcfdb41ff1891744337d6aa9547a7e59fe466f684b027f295"
,
},
}
]
row
=
{
"id"
:
b
"
\x03
@v
\xf3\xf4\x1e\xe1
N
\xb9\xf6
@
\x82\xcb\xe6\xe9
P
\xd7\xbb\x8a
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2019
,
2
,
25
,
15
,
49
,
16
,
594536
,
tzinfo
=
datetime
.
timezone
.
utc
),
"committer_date"
:
datetime
.
datetime
(
2019
,
2
,
25
,
15
,
49
,
16
,
594536
,
tzinfo
=
datetime
.
timezone
.
utc
),
"type"
:
"tar"
,
"message"
:
b
"hal: Deposit 229 in collection hal"
,
"metadata"
:
{
"original_artifact"
:
source_original_artifacts
},
}
swhid
=
(
"swh:1:dir:3d65b6f065118cb856272829b459f0dfa55549aa"
";origin=https://hal-test.archives-ouvertes.fr/hal-01243573"
";visit=swh:1:snp:322c54ff4023d3216a994bc9ff9ee524ed80ee1f"
";anchor=swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
";path=/"
)
deposit_rows
=
[
{
"deposit.id"
:
229
,
"deposit.external_id"
:
"hal-01243573"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
None
,
"deposit_request.date"
:
datetime
.
datetime
(
2019
,
2
,
25
,
15
,
54
,
30
,
102072
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
{
"deposit.id"
:
229
,
"deposit.external_id"
:
"hal-01243573"
,
"deposit.swhid_context"
:
swhid
,
"deposit.status"
:
"success"
,
"deposit_request.metadata"
:
extrinsic_metadata
,
"deposit_request.date"
:
datetime
.
datetime
(
2019
,
2
,
25
,
15
,
49
,
12
,
302745
,
tzinfo
=
datetime
.
timezone
.
utc
),
"deposit_client.provider_url"
:
"https://hal.archives-ouvertes.fr/"
,
"deposit_collection.name"
:
"hal"
,
"auth_user.username"
:
"hal"
,
},
]
origin_url
=
"https://hal.archives-ouvertes.fr/hal-01243573"
# /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573
# do not trust the metadata!
storage
=
Mock
()
def
origin_get
(
urls
):
assert
urls
==
[
origin_url
]
return
[
Origin
(
url
=
origin_url
)]
storage
.
origin_get
.
side_effect
=
origin_get
deposit_cur
=
get_mock_deposit_cur
(
deposit_rows
)
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
deposit_cur
.
execute
.
assert_called_once
()
deposit_cur
.
__iter__
.
assert_called_once
()
assert
storage
.
method_calls
==
[
call
.
origin_get
([
origin_url
]),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2019
,
2
,
25
,
15
,
49
,
12
,
302745
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
HAL_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"sword-v2-atom-codemeta-v2-in-json"
,
metadata
=
json
.
dumps
(
extrinsic_metadata
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
),
),
]
),
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2019
,
2
,
25
,
15
,
54
,
30
,
102072
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
dest_original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
),
),
]
),
]
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Sat, Jun 21, 9:29 PM (4 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3237309
Attached To
rDSTO Storage manager
Event Timeline
Log In to Comment