Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341394
test_debian.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
test_debian.py
View Options
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# flake8: noqa
# because of long lines
import
copy
import
datetime
import
json
from
unittest.mock
import
Mock
,
call
from
unittest.mock
import
patch
as
_patch
import
attr
import
pytest
from
swh.model.identifiers
import
CoreSWHID
,
ExtendedObjectType
,
ExtendedSWHID
from
swh.model.model
import
(
MetadataAuthority
,
MetadataAuthorityType
,
MetadataFetcher
,
Origin
,
OriginVisit
,
OriginVisitStatus
,
Person
,
RawExtrinsicMetadata
,
Revision
,
RevisionType
,
Snapshot
,
SnapshotBranch
,
TargetType
,
Timestamp
,
TimestampWithTimezone
,
)
from
swh.storage
import
get_storage
from
swh.storage.interface
import
ListOrder
,
PagedResult
from
swh.storage.migrate_extrinsic_metadata
import
debian_origins_from_row
,
handle_row
FETCHER
=
MetadataFetcher
(
name
=
"migrate-extrinsic-metadata-from-revisions"
,
version
=
"0.0.1"
,
)
SWH_AUTHORITY
=
MetadataAuthority
(
type
=
MetadataAuthorityType
.
REGISTRY
,
url
=
"https://softwareheritage.org/"
,
metadata
=
{},
)
DIRECTORY_ID
=
b
"a"
*
20
DIRECTORY_SWHID
=
ExtendedSWHID
(
object_type
=
ExtendedObjectType
.
DIRECTORY
,
object_id
=
DIRECTORY_ID
)
def
now
():
return
datetime
.
datetime
.
now
(
tz
=
datetime
.
timezone
.
utc
)
def
patch
(
function_name
,
*
args
,
**
kwargs
):
# It's a long name, this function spares some line breaks in 'with' statements
return
_patch
(
"swh.storage.migrate_extrinsic_metadata."
+
function_name
,
*
args
,
**
kwargs
)
def
test_debian_origins_from_row
():
"""Tests debian_origins_from_row on a real example (with some parts
omitted, for conciseness)."""
origin_url
=
"deb://Debian/packages/kalgebra"
visit
=
OriginVisit
(
origin
=
origin_url
,
date
=
datetime
.
datetime
(
2020
,
1
,
27
,
19
,
32
,
3
,
925498
,
tzinfo
=
datetime
.
timezone
.
utc
,
),
type
=
"deb"
,
visit
=
280
,
)
storage
=
get_storage
(
"memory"
)
storage
.
origin_add
(
[
Origin
(
url
=
origin_url
),
Origin
(
url
=
"http://snapshot.debian.org/package/kalgebra/"
),
]
)
storage
.
origin_visit_add
([
visit
])
storage
.
origin_visit_status_add
(
[
OriginVisitStatus
(
origin
=
origin_url
,
visit
=
280
,
date
=
datetime
.
datetime
(
2020
,
1
,
27
,
19
,
32
,
3
,
925498
,
tzinfo
=
datetime
.
timezone
.
utc
),
status
=
"full"
,
snapshot
=
b
"
\xaf
D
\x15\x98
){
\xd4
$
\xde
I
\x1f\xbe\x95
lh`x
\x14\xce\xc4
"
,
metadata
=
None
,
)
],
)
snapshot
=
Snapshot
(
id
=
b
"
\xaf
D
\x15\x98
){
\xd4
$
\xde
I
\x1f\xbe\x95
lh`x
\x14\xce\xc4
"
,
branches
=
{
# ...
b
"releases/unstable/main/4:19.12.1-1"
:
SnapshotBranch
(
target
=
b
"
\x00\x00\x03
l1
\x1e\xf3
:(
\x1b\x05
h
\x8f
n
\xad\xcf\xc0\x94
:
\xee
"
,
target_type
=
TargetType
.
REVISION
,
),
},
)
revision_row
=
{
"id"
:
b
"
\x00\x00\x03
l1
\x1e\xf3
:(
\x1b\x05
h
\x8f
n
\xad\xcf\xc0\x94
:
\xee
"
,
"directory"
:
DIRECTORY_ID
,
"metadata"
:
{
# ...
"original_artifact"
:
[
{
"filename"
:
"kalgebra_19.12.1-1.dsc"
,
# ...
},
]
},
}
storage
.
snapshot_add
([
snapshot
])
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[
origin_url
]
def
test_debian_origins_from_row__no_result
():
"""Tests debian_origins_from_row when there's no origin, visit, status,
snapshot, branch, or matching branch.
"""
storage
=
get_storage
(
"memory"
)
origin_url
=
"deb://Debian/packages/kalgebra"
snapshot_id
=
b
"42424242424242424242"
revision_id
=
b
"21212121212121212121"
storage
.
origin_add
([
Origin
(
url
=
origin_url
)])
revision_row
=
{
"id"
:
b
"
\x00\x00\x03
l1
\x1e\xf3
:(
\x1b\x05
h
\x8f
n
\xad\xcf\xc0\x94
:
\xee
"
,
"directory"
:
DIRECTORY_ID
,
"metadata"
:
{
"original_artifact"
:
[{
"filename"
:
"kalgebra_19.12.1-1.dsc"
,},]},
}
# no visit
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
storage
.
origin_visit_add
(
[
OriginVisit
(
origin
=
origin_url
,
date
=
now
(),
type
=
"deb"
,
visit
=
280
,)]
)
# no status
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
status
=
OriginVisitStatus
(
origin
=
origin_url
,
visit
=
280
,
date
=
now
(),
status
=
"full"
,
snapshot
=
None
,
metadata
=
None
,
)
storage
.
origin_visit_status_add
([
status
])
# no snapshot
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
status
=
attr
.
evolve
(
status
,
snapshot
=
snapshot_id
,
date
=
now
())
storage
.
origin_visit_status_add
([
status
])
storage_before_snapshot
=
copy
.
deepcopy
(
storage
)
snapshot
=
Snapshot
(
id
=
snapshot_id
,
branches
=
{})
storage
.
snapshot_add
([
snapshot
])
# no branch
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
# "remove" the snapshot, so we can add a new one with the same id
storage
=
copy
.
deepcopy
(
storage_before_snapshot
)
snapshot
=
attr
.
evolve
(
snapshot
,
branches
=
{
b
"foo"
:
None
,},)
storage
.
snapshot_add
([
snapshot
])
# dangling branch
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
# "remove" the snapshot again
storage
=
copy
.
deepcopy
(
storage_before_snapshot
)
snapshot
=
attr
.
evolve
(
snapshot
,
branches
=
{
b
"foo"
:
SnapshotBranch
(
target_type
=
TargetType
.
REVISION
,
target
=
revision_id
,)
},
)
storage
.
snapshot_add
([
snapshot
])
# branch points to unknown revision
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
revision
=
Revision
(
id
=
revision_id
,
message
=
b
"foo"
,
author
=
Person
.
from_fullname
(
b
"foo"
),
committer
=
Person
.
from_fullname
(
b
"foo"
),
date
=
TimestampWithTimezone
(
timestamp
=
Timestamp
(
seconds
=
1580076204
,
microseconds
=
0
),
offset
=
60
,
negative_utc
=
False
,
),
committer_date
=
TimestampWithTimezone
(
timestamp
=
Timestamp
(
seconds
=
1580076204
,
microseconds
=
0
),
offset
=
60
,
negative_utc
=
False
,
),
type
=
RevisionType
.
DSC
,
directory
=
b
"
\xd5\x9a\x1f\x9c\x80\x9d\x8c
}19P
\xf6\xc8\xa2\x0f
^%H
\xcd\xdb
"
,
synthetic
=
True
,
metadata
=
None
,
parents
=
(),
extra_headers
=
(),
)
storage
.
revision_add
([
revision
])
# no matching branch
assert
debian_origins_from_row
(
revision_row
,
storage
)
==
[]
def
test_debian_origins_from_row__check_revisions
():
"""Tests debian_origins_from_row errors when the revision at the head
of a branch is a DSC and has no parents
"""
storage
=
get_storage
(
"memory"
)
origin_url
=
"deb://Debian/packages/kalgebra"
revision_id
=
b
"21"
*
10
storage
.
origin_add
([
Origin
(
url
=
origin_url
)])
revision_row
=
{
"id"
:
b
"
\x00\x00\x03
l1
\x1e\xf3
:(
\x1b\x05
h
\x8f
n
\xad\xcf\xc0\x94
:
\xee
"
,
"directory"
:
DIRECTORY_ID
,
"metadata"
:
{
"original_artifact"
:
[{
"filename"
:
"kalgebra_19.12.1-1.dsc"
,},]},
}
storage
.
origin_visit_add
(
[
OriginVisit
(
origin
=
origin_url
,
date
=
datetime
.
datetime
.
now
(
tz
=
datetime
.
timezone
.
utc
),
type
=
"deb"
,
visit
=
280
,
)
]
)
storage
.
origin_visit_status_add
(
[
OriginVisitStatus
(
origin
=
origin_url
,
visit
=
280
,
date
=
datetime
.
datetime
.
now
(
tz
=
datetime
.
timezone
.
utc
),
status
=
"full"
,
snapshot
=
b
"42"
*
10
,
metadata
=
None
,
)
]
)
storage
.
snapshot_add
(
[
Snapshot
(
id
=
b
"42"
*
10
,
branches
=
{
b
"foo"
:
SnapshotBranch
(
target_type
=
TargetType
.
REVISION
,
target
=
revision_id
)
},
)
]
)
storage_before_revision
=
copy
.
deepcopy
(
storage
)
revision
=
Revision
(
id
=
revision_id
,
message
=
b
"foo"
,
author
=
Person
.
from_fullname
(
b
"foo"
),
committer
=
Person
.
from_fullname
(
b
"foo"
),
date
=
TimestampWithTimezone
(
timestamp
=
Timestamp
(
seconds
=
1580076204
,
microseconds
=
0
),
offset
=
60
,
negative_utc
=
False
,
),
committer_date
=
TimestampWithTimezone
(
timestamp
=
Timestamp
(
seconds
=
1580076204
,
microseconds
=
0
),
offset
=
60
,
negative_utc
=
False
,
),
type
=
RevisionType
.
DSC
,
directory
=
b
"
\xd5\x9a\x1f\x9c\x80\x9d\x8c
}19P
\xf6\xc8\xa2\x0f
^%H
\xcd\xdb
"
,
synthetic
=
True
,
metadata
=
None
,
parents
=
(
b
"parent "
*
2
,),
extra_headers
=
(),
)
storage
.
revision_add
([
revision
])
with
pytest
.
raises
(
AssertionError
,
match
=
"revision with parents"
):
debian_origins_from_row
(
revision_row
,
storage
)
def
test_debian_with_extrinsic
():
dest_original_artifacts
=
[
{
"length"
:
2936
,
"filename"
:
"kalgebra_19.12.1-1.dsc"
,
"checksums"
:
{
"sha1"
:
"f869e9f1155b1ee6d28ae3b40060570152a358cd"
,
"sha256"
:
"75f77150aefdaa4bcf8bc5b1e9b8b90b5cb1651b76a068c5e58e5b83658d5d11"
,
},
"url"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.dsc"
,
},
{
"length"
:
1156408
,
"filename"
:
"kalgebra_19.12.1.orig.tar.xz"
,
"checksums"
:
{
"sha1"
:
"e496032962212983a5359aebadfe13c4026fd45c"
,
"sha256"
:
"49d623186800eb8f6fbb91eb43fb14dff78e112624c9cda6b331d494d610b16a"
,
},
"url"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz"
,
},
{
"length"
:
10044
,
"filename"
:
"kalgebra_19.12.1-1.debian.tar.xz"
,
"checksums"
:
{
"sha1"
:
"b518bfc2ac708b40577c595bd539faa8b84572db"
,
"sha256"
:
"1a30acd2699c3769da302f7a0c63a7d7b060f80925b38c8c43ce3bec92744d67"
,
},
"url"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.debian.tar.xz"
,
},
{
"length"
:
488
,
"filename"
:
"kalgebra_19.12.1.orig.tar.xz.asc"
,
"checksums"
:
{
"sha1"
:
"ff53a5c21c1aef2b9caa38a02fa3488f43df4c20"
,
"sha256"
:
"a37e0b95bb1f16b19b0587bc5d3b99ba63a195d7f6335c4a359122ad96d682dd"
,
},
"url"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz.asc"
,
},
]
source_original_artifacts
=
[
{
k
:
v
for
(
k
,
v
)
in
d
.
items
()
if
k
!=
"url"
}
for
d
in
dest_original_artifacts
]
row
=
{
"id"
:
b
"
\x00\x00\x03
l1
\x1e\xf3
:(
\x1b\x05
h
\x8f
n
\xad\xcf\xc0\x94
:
\xee
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2020
,
1
,
26
,
22
,
3
,
24
,
tzinfo
=
datetime
.
timezone
.
utc
,
),
"date_offset"
:
60
,
"type"
:
"dsc"
,
"message"
:
b
"Synthetic revision for Debian source package kalgebra version 4:19.12.1-1"
,
"metadata"
:
{
"extrinsic"
:
{
"raw"
:
{
"id"
:
2718802
,
"name"
:
"kalgebra"
,
"files"
:
{
"kalgebra_19.12.1-1.dsc"
:
{
"uri"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.dsc"
,
"name"
:
"kalgebra_19.12.1-1.dsc"
,
"size"
:
2936
,
"md5sum"
:
"fd28f604d4cc31a0a305543230f1622a"
,
"sha256"
:
"75f77150aefdaa4bcf8bc5b1e9b8b90b5cb1651b76a068c5e58e5b83658d5d11"
,
},
"kalgebra_19.12.1.orig.tar.xz"
:
{
"uri"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz"
,
"name"
:
"kalgebra_19.12.1.orig.tar.xz"
,
"size"
:
1156408
,
"md5sum"
:
"34e09ed152da762d53101ea33634712b"
,
"sha256"
:
"49d623186800eb8f6fbb91eb43fb14dff78e112624c9cda6b331d494d610b16a"
,
},
"kalgebra_19.12.1-1.debian.tar.xz"
:
{
"uri"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.debian.tar.xz"
,
"name"
:
"kalgebra_19.12.1-1.debian.tar.xz"
,
"size"
:
10044
,
"md5sum"
:
"4f639f36143898d97d044f273f038e58"
,
"sha256"
:
"1a30acd2699c3769da302f7a0c63a7d7b060f80925b38c8c43ce3bec92744d67"
,
},
"kalgebra_19.12.1.orig.tar.xz.asc"
:
{
"uri"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1.orig.tar.xz.asc"
,
"name"
:
"kalgebra_19.12.1.orig.tar.xz.asc"
,
"size"
:
488
,
"md5sum"
:
"3c29291e4e6f0c294de80feb8e9fce4c"
,
"sha256"
:
"a37e0b95bb1f16b19b0587bc5d3b99ba63a195d7f6335c4a359122ad96d682dd"
,
},
},
"version"
:
"4:19.12.1-1"
,
"revision_id"
:
None
,
},
"when"
:
"2020-01-27T19:32:03.925498+00:00"
,
"provider"
:
"http://deb.debian.org/debian//pool/main/k/kalgebra/kalgebra_19.12.1-1.dsc"
,
},
"intrinsic"
:
{
"raw"
:
{
"name"
:
"kalgebra"
,
"version"
:
"4:19.12.1-1"
,
# ...
},
"tool"
:
"dsc"
,
},
"original_artifact"
:
source_original_artifacts
,
},
}
origin_url
=
"deb://Debian/packages/kalgebra"
storage
=
Mock
()
deposit_cur
=
None
with
patch
(
"debian_origins_from_row"
,
return_value
=
[
origin_url
]):
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
assert
storage
.
method_calls
==
[
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2020
,
1
,
26
,
22
,
3
,
24
,
tzinfo
=
datetime
.
timezone
.
utc
,
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
dest_original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee"
),
),
]
),
]
def
test_debian_without_extrinsic
():
source_original_artifacts
=
[
{
"name"
:
"pymongo_1.10-1.dsc"
,
"sha1"
:
"81877c1ae4406c2519b9cc9c4557cf6b0775a241"
,
"length"
:
99
,
"sha256"
:
"40269a73f38ee4c2f9cc021f1d5d091cc59ca6e778c339684b7be030e29e282f"
,
"sha1_git"
:
"0ac7bdb8e4d10926c5d3e51baa2be7bb29a3966b"
,
},
{
"name"
:
"pymongo_1.10.orig.tar.gz"
,
"sha1"
:
"4f4c97641b86ac8f21396281bd1a7369236693c3"
,
"length"
:
99
,
"sha256"
:
"0b6bffb310782ffaeb3916c75790742ec5830c63a758fc711cd1f557eb5a4b5f"
,
"sha1_git"
:
"19ef0adda8868520d1ef9d4164b3ace4df1d62ad"
,
},
{
"name"
:
"pymongo_1.10-1.debian.tar.gz"
,
"sha1"
:
"fbf378296613c8d55e043aec98896b3e50a94971"
,
"length"
:
99
,
"sha256"
:
"3970cc70fe3ba6499a9c56ba4b4c6c3782f56433d0d17d72b7a0e2ceae31b513"
,
"sha1_git"
:
"2eea9904806050a8fda95edd5d4fa60d29c1fdec"
,
},
]
dest_original_artifacts
=
[
{
"length"
:
99
,
"filename"
:
"pymongo_1.10-1.dsc"
,
"checksums"
:
{
"sha1"
:
"81877c1ae4406c2519b9cc9c4557cf6b0775a241"
,
"sha256"
:
"40269a73f38ee4c2f9cc021f1d5d091cc59ca6e778c339684b7be030e29e282f"
,
"sha1_git"
:
"0ac7bdb8e4d10926c5d3e51baa2be7bb29a3966b"
,
},
},
{
"length"
:
99
,
"filename"
:
"pymongo_1.10.orig.tar.gz"
,
"checksums"
:
{
"sha1"
:
"4f4c97641b86ac8f21396281bd1a7369236693c3"
,
"sha256"
:
"0b6bffb310782ffaeb3916c75790742ec5830c63a758fc711cd1f557eb5a4b5f"
,
"sha1_git"
:
"19ef0adda8868520d1ef9d4164b3ace4df1d62ad"
,
},
},
{
"length"
:
99
,
"filename"
:
"pymongo_1.10-1.debian.tar.gz"
,
"checksums"
:
{
"sha1"
:
"fbf378296613c8d55e043aec98896b3e50a94971"
,
"sha256"
:
"3970cc70fe3ba6499a9c56ba4b4c6c3782f56433d0d17d72b7a0e2ceae31b513"
,
"sha1_git"
:
"2eea9904806050a8fda95edd5d4fa60d29c1fdec"
,
},
},
]
row
=
{
"id"
:
b
"
\x00\x00\x01\xc2\x8c\x8f\xca\x01\xb9\x04\xde\x92\xa2
d
\n\x86
l
\xe0
<
\xb7
"
,
"directory"
:
DIRECTORY_ID
,
"date"
:
datetime
.
datetime
(
2011
,
3
,
31
,
20
,
17
,
41
,
tzinfo
=
datetime
.
timezone
.
utc
),
"date_offset"
:
0
,
"type"
:
"dsc"
,
"message"
:
b
"Synthetic revision for Debian source package pymongo version 1.10-1"
,
"metadata"
:
{
"package_info"
:
{
"name"
:
"pymongo"
,
"version"
:
"1.10-1"
,
"changelog"
:
{
# ...
},
"maintainers"
:
[
{
"name"
:
"Federico Ceratto"
,
"email"
:
"federico.ceratto@gmail.com"
},
{
"name"
:
"Janos Guljas"
,
"email"
:
"janos@resenje.org"
},
],
"pgp_signature"
:
{
"date"
:
"2011-03-31T21:02:44+00:00"
,
"keyid"
:
"2BABC6254E66E7B8450AC3E1E6AA90171392B174"
,
"person"
:
{
"name"
:
"David Paleino"
,
"email"
:
"d.paleino@gmail.com"
},
},
"lister_metadata"
:
{
"id"
:
244296
,
"lister"
:
"snapshot.debian.org"
},
},
"original_artifact"
:
source_original_artifacts
,
},
}
storage
=
Mock
()
origin_url
=
"http://snapshot.debian.org/package/pymongo"
deposit_cur
=
None
with
patch
(
"debian_origins_from_row"
,
return_value
=
[
origin_url
]):
handle_row
(
copy
.
deepcopy
(
row
),
storage
,
deposit_cur
,
dry_run
=
False
)
assert
storage
.
method_calls
==
[
call
.
raw_extrinsic_metadata_add
(
[
RawExtrinsicMetadata
(
target
=
DIRECTORY_SWHID
,
discovery_date
=
datetime
.
datetime
(
2011
,
3
,
31
,
20
,
17
,
41
,
tzinfo
=
datetime
.
timezone
.
utc
),
authority
=
SWH_AUTHORITY
,
fetcher
=
FETCHER
,
format
=
"original-artifacts-json"
,
metadata
=
json
.
dumps
(
dest_original_artifacts
)
.
encode
(),
origin
=
origin_url
,
revision
=
CoreSWHID
.
from_string
(
"swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7"
),
),
]
)
]
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Fri, Jul 4, 12:00 PM (3 w, 14 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3297660
Attached To
rDSTO Storage manager
Event Timeline
Log In to Comment