Page MenuHomeSoftware Heritage

D3583.diff
No OneTemporary

D3583.diff

diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py
--- a/swh/storage/pytest_plugin.py
+++ b/swh/storage/pytest_plugin.py
@@ -211,9 +211,8 @@
"""
return {
- "content": [data.cont, data.cont2],
- "content_no_data": [data.cont3],
- "skipped_content": [data.skipped_cont, data.skipped_cont2],
+ "content": [data.content, data.content2, data.content3],
+ "skipped_content": [data.skipped_content, data.skipped_content2],
"person": [data.person],
"directory": [data.dir2, data.dir, data.dir3, data.dir4],
"revision": [data.revision, data.revision2, data.revision3, data.revision4],
@@ -240,7 +239,6 @@
# changed but not the endpoints yet)
OBJECT_FACTORY = {
"content": Content.from_dict,
- "content_no_data": Content.from_dict,
"skipped_content": SkippedContent.from_dict,
"person": Person.from_dict,
"directory": Directory.from_dict,
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2019 The Software Heritage developers
+# Copyright (C) 2015-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,11 +11,13 @@
from swh.model import from_disk
from swh.model.identifiers import parse_swhid
from swh.model.model import (
+ Content,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- RawExtrinsicMetadata,
MetadataTargetType,
+ RawExtrinsicMetadata,
+ SkippedContent,
)
@@ -33,96 +35,92 @@
data = StorageData()
-cont = {
- "data": b"42\n",
- "length": 3,
- "sha1": hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"),
- "sha1_git": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
- "sha256": hash_to_bytes(
+content = Content(
+ data=b"42\n",
+ length=3,
+ sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"),
+ sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
+ sha256=hash_to_bytes(
"673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a"
),
- "blake2s256": hash_to_bytes(
+ blake2s256=hash_to_bytes(
"d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d"
),
- "status": "visible",
-}
+ status="visible",
+)
-cont2 = {
- "data": b"4242\n",
- "length": 5,
- "sha1": hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"),
- "sha1_git": hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"),
- "sha256": hash_to_bytes(
+content2 = Content(
+ data=b"4242\n",
+ length=5,
+ sha1=hash_to_bytes("61c2b3a30496d329e21af70dd2d7e097046d07b7"),
+ sha1_git=hash_to_bytes("36fade77193cb6d2bd826161a0979d64c28ab4fa"),
+ sha256=hash_to_bytes(
"859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd"
),
- "blake2s256": hash_to_bytes(
+ blake2s256=hash_to_bytes(
"849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d"
),
- "status": "visible",
-}
+ status="visible",
+)
-cont3 = {
- "data": b"424242\n",
- "length": 7,
- "sha1": hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"),
- "sha1_git": hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"),
- "sha256": hash_to_bytes(
+content3 = Content(
+ data=b"424242\n",
+ length=7,
+ sha1=hash_to_bytes("3e21cc4942a4234c9e5edd8a9cacd1670fe59f13"),
+ sha1_git=hash_to_bytes("c932c7649c6dfa4b82327d121215116909eb3bea"),
+ sha256=hash_to_bytes(
"92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36"
),
- "blake2s256": hash_to_bytes(
+ blake2s256=hash_to_bytes(
"76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11"
),
- "status": "visible",
- "ctime": "2019-12-01 00:00:00Z",
-}
-
-contents = (cont, cont2, cont3)
+ status="visible",
+ ctime=datetime.datetime(2019, 12, 1, tzinfo=datetime.timezone.utc),
+)
-missing_cont = {
- "length": 8,
- "sha1": hash_to_bytes("f9c24e2abb82063a3ba2c44efd2d3c797f28ac90"),
- "sha1_git": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8919"),
- "sha256": hash_to_bytes(
+missing_content = Content(
+ data=b"something missing",
+ length=8,
+ sha1=hash_to_bytes("f9c24e2abb82063a3ba2c44efd2d3c797f28ac90"),
+ sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8919"),
+ sha256=hash_to_bytes(
"6bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a"
),
- "blake2s256": hash_to_bytes(
+ blake2s256=hash_to_bytes(
"306856b8fd879edb7b6f1aeaaf8db9bbecc993cd7f776c333ac3a782fa5c6eba"
),
- "reason": "Content too long",
- "status": "absent",
-}
+ status="visible",
+)
-skipped_cont = {
- "length": 1024 * 1024 * 200,
- "sha1_git": hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"),
- "sha1": hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"),
- "sha256": hash_to_bytes(
+skipped_content = SkippedContent(
+ length=1024 * 1024 * 200,
+ sha1_git=hash_to_bytes("33e45d56f88993aae6a0198013efa80716fd8920"),
+ sha1=hash_to_bytes("43e45d56f88993aae6a0198013efa80716fd8920"),
+ sha256=hash_to_bytes(
"7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a"
),
- "blake2s256": hash_to_bytes(
+ blake2s256=hash_to_bytes(
"ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b"
),
- "reason": "Content too long",
- "status": "absent",
- "origin": "file:///dev/zero",
-}
+ reason="Content too long",
+ status="absent",
+ origin="file:///dev/zero",
+)
-skipped_cont2 = {
- "length": 1024 * 1024 * 300,
- "sha1_git": hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"),
- "sha1": hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"),
- "sha256": hash_to_bytes(
+skipped_content2 = SkippedContent(
+ length=1024 * 1024 * 300,
+ sha1_git=hash_to_bytes("44e45d56f88993aae6a0198013efa80716fd8921"),
+ sha1=hash_to_bytes("54e45d56f88993aae6a0198013efa80716fd8920"),
+ sha256=hash_to_bytes(
"8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a"
),
- "blake2s256": hash_to_bytes(
+ blake2s256=hash_to_bytes(
"9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b"
),
- "reason": "Content too long",
- "status": "absent",
-}
-
-skipped_contents = (skipped_cont, skipped_cont2)
+ reason="Content too long",
+ status="absent",
+)
dir = {
@@ -131,7 +129,7 @@
{
"name": b"foo",
"type": "file",
- "target": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), # cont
+ "target": content.sha1_git,
"perms": from_disk.DentryPerms.content,
},
{
@@ -149,9 +147,7 @@
{
"name": b"oof",
"type": "file",
- "target": hash_to_bytes( # cont2
- "36fade77193cb6d2bd826161a0979d64c28ab4fa"
- ),
+ "target": content2.sha1_git,
"perms": from_disk.DentryPerms.content,
},
),
@@ -163,7 +159,7 @@
{
"name": b"foo",
"type": "file",
- "target": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), # cont
+ "target": content.sha1_git,
"perms": from_disk.DentryPerms.content,
},
{
@@ -505,7 +501,7 @@
content_metadata = RawExtrinsicMetadata(
type=MetadataTargetType.CONTENT,
- id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
origin=origin["url"],
discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
@@ -517,7 +513,7 @@
)
content_metadata2 = RawExtrinsicMetadata(
type=MetadataTargetType.CONTENT,
- id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
origin=origin2["url"],
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
@@ -529,7 +525,7 @@
)
content_metadata3 = RawExtrinsicMetadata(
type=MetadataTargetType.CONTENT,
- id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"),
discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
@@ -585,13 +581,3 @@
"email": b"john.doe@institute.org",
"fullname": b"John Doe <john.doe@institute.org>",
}
-
-objects = {
- "content": contents,
- "skipped_content": skipped_contents,
- "directory": directories,
- "revision": revisions,
- "origin": origins,
- "release": releases,
- "snapshot": snapshots,
-}
diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py
--- a/swh/storage/tests/test_buffer.py
+++ b/swh/storage/tests/test_buffer.py
@@ -16,11 +16,11 @@
def test_buffering_proxy_storage_content_threshold_not_hit(sample_data_model):
- contents = sample_data_model["content"]
+ contents = sample_data_model["content"][:2]
contents_dict = [c.to_dict() for c in contents]
storage = get_storage_with_buffer_config(min_batch_size={"content": 10,})
- s = storage.content_add([contents[0], contents[1]])
+ s = storage.content_add(contents)
assert s == {}
# contents have not been written to storage
@@ -57,7 +57,7 @@
def test_buffering_proxy_storage_content_deduplicate(sample_data_model):
- contents = sample_data_model["content"]
+ contents = sample_data_model["content"][:2]
storage = get_storage_with_buffer_config(min_batch_size={"content": 2,})
s = storage.content_add([contents[0], contents[0]])
@@ -80,7 +80,7 @@
def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data_model):
- contents = sample_data_model["content"]
+ contents = sample_data_model["content"][:2]
content_bytes_min_batch_size = 2
storage = get_storage_with_buffer_config(
min_batch_size={"content": 10, "content_bytes": content_bytes_min_batch_size,}
diff --git a/swh/storage/tests/test_pytest_plugin.py b/swh/storage/tests/test_pytest_plugin.py
--- a/swh/storage/tests/test_pytest_plugin.py
+++ b/swh/storage/tests/test_pytest_plugin.py
@@ -15,7 +15,6 @@
assert set(sample_data.keys()) == set(
[
"content",
- "content_no_data",
"skipped_content",
"person",
"directory",
@@ -32,6 +31,8 @@
)
for object_type, objs in sample_data.items():
if object_type in [
+ "content",
+ "skipped_content",
"fetcher",
"authority",
"origin_metadata",
@@ -49,7 +50,6 @@
assert set(sample_data_model.keys()) == set(
[
"content",
- "content_no_data",
"skipped_content",
"person",
"directory",
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import attr
+
from unittest.mock import call
import psycopg2
@@ -117,13 +119,14 @@
"""Standard content_add_metadata works as before
"""
- sample_content = sample_data_model["content_no_data"][0]
+ sample_content = sample_data_model["content"][0]
+ content = attr.evolve(sample_content, data=None)
- pk = sample_content.sha1
+ pk = content.sha1
content_metadata = swh_storage.content_get_metadata([pk])
assert not content_metadata[pk]
- s = swh_storage.content_add_metadata([sample_content])
+ s = swh_storage.content_add_metadata([content])
assert s == {
"content:add": 1,
}
@@ -151,13 +154,14 @@
{"content:add": 1},
]
- sample_content = sample_data_model["content_no_data"][0]
+ sample_content = sample_data_model["content"][0]
+ content = attr.evolve(sample_content, data=None)
- s = swh_storage.content_add_metadata([sample_content])
+ s = swh_storage.content_add_metadata([content])
assert s == {"content:add": 1}
mock_memory.assert_has_calls(
- [call([sample_content]), call([sample_content]), call([sample_content]),]
+ [call([content]), call([content]), call([content]),]
)
@@ -174,14 +178,15 @@
"Refuse to add content_metadata!"
)
- sample_content = sample_data_model["content_no_data"][0]
- pk = sample_content.sha1
+ sample_content = sample_data_model["content"][0]
+ content = attr.evolve(sample_content, data=None)
+ pk = content.sha1
content_metadata = swh_storage.content_get_metadata([pk])
assert not content_metadata[pk]
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.content_add_metadata([sample_content])
+ swh_storage.content_add_metadata([content])
assert mock_memory.call_count == 1
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -36,7 +36,6 @@
Person,
Release,
Revision,
- SkippedContent,
Snapshot,
)
from swh.model.hypothesis_strategies import objects
@@ -107,7 +106,8 @@
class LazyContent(Content):
def with_data(self):
- return Content.from_dict({**self.to_dict(), "data": data.cont["data"]})
+ raw_data = data.content.data
+ return Content.from_dict({**self.to_dict(), "data": raw_data})
class TestStorage:
@@ -470,14 +470,13 @@
)
def test_content_missing(self, swh_storage, algos):
algos |= {"sha1"}
- cont = Content.from_dict(data.cont2)
- missing_cont = SkippedContent.from_dict(data.missing_cont)
- swh_storage.content_add([cont])
+ content, missing_content = [data.content2, data.missing_content]
+ swh_storage.content_add([content])
- test_contents = [cont.to_dict()]
+ test_contents = [content.to_dict()]
missing_per_hash = defaultdict(list)
for i in range(256):
- test_content = missing_cont.to_dict()
+ test_content = missing_content.to_dict()
for hash in algos:
test_content[hash] = bytes([i]) + test_content[hash][1:]
missing_per_hash[hash].append(test_content[hash])
@@ -501,14 +500,13 @@
)
def test_content_missing_unknown_algo(self, swh_storage, algos):
algos |= {"sha1"}
- cont = Content.from_dict(data.cont2)
- missing_cont = SkippedContent.from_dict(data.missing_cont)
- swh_storage.content_add([cont])
+ content, missing_content = [data.content2, data.missing_content]
+ swh_storage.content_add([content])
- test_contents = [cont.to_dict()]
+ test_contents = [content.to_dict()]
missing_per_hash = defaultdict(list)
for i in range(16):
- test_content = missing_cont.to_dict()
+ test_content = missing_content.to_dict()
for hash in algos:
test_content[hash] = bytes([i]) + test_content[hash][1:]
missing_per_hash[hash].append(test_content[hash])
@@ -651,8 +649,7 @@
assert tuple(actual_contents[missing_cont.sha1]) == ()
def test_content_get_random(self, swh_storage, sample_data_model):
- cont, cont2 = sample_data_model["content"][:2]
- cont3 = sample_data_model["content_no_data"][0]
+ cont, cont2, cont3 = sample_data_model["content"][:3]
swh_storage.content_add([cont, cont2, cont3])
assert swh_storage.content_get_random() in {
@@ -2915,7 +2912,7 @@
def test_content_find_with_non_present_content(
self, swh_storage, sample_data_model
):
- missing_content = sample_data_model["content_no_data"][0]
+ missing_content = sample_data_model["skipped_content"][0]
# 1. with something that does not exist
actually_present = swh_storage.content_find({"sha1": missing_content.sha1})

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 5:11 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220194

Event Timeline