Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123094
D5414.id19365.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
D5414.id19365.diff
View Options
diff --git a/swh/storage/replay.py b/swh/storage/replay.py
--- a/swh/storage/replay.py
+++ b/swh/storage/replay.py
@@ -4,7 +4,7 @@
# See top-level LICENSE file for more information
import logging
-from typing import Any, Callable, Dict, Iterable, List
+from typing import Any, Callable, Container, Dict, Iterable, List
try:
from systemd.daemon import notify
@@ -103,6 +103,11 @@
logger.error("Collision detected: %(collision)s", {"collision": collision})
+def dict_key_dropper(d: Dict, keys_to_drop: Container) -> Dict:
+ """Returns a copy of the dict d without any key listed in keys_to_drop"""
+ return {k: v for (k, v) in d.items() if k not in keys_to_drop}
+
+
def _insert_objects(object_type: str, objects: List[Dict], storage) -> None:
"""Insert objects of type object_type in the storage.
@@ -146,6 +151,18 @@
storage.metadata_authority_add(authorities)
storage.metadata_fetcher_add(fetchers)
storage.raw_extrinsic_metadata_add(converted)
+ elif object_type == "revision":
+ # drop the metadata field from the revision (is any); this field is
+ # about to be dropped from the data model (in favor of
+ # raw_extrinsic_metadata) and there can be bogus values in the existing
+ # journal (metadata with \0000 in it)
+ method = getattr(storage, object_type + "_add")
+ method(
+ [
+ object_converter_fn[object_type](dict_key_dropper(o, ("metadata",)))
+ for o in objects
+ ]
+ )
elif object_type in (
"directory",
"extid",
diff --git a/swh/storage/tests/test_replay.py b/swh/storage/tests/test_replay.py
--- a/swh/storage/tests/test_replay.py
+++ b/swh/storage/tests/test_replay.py
@@ -14,8 +14,15 @@
from swh.journal.client import JournalClient
from swh.journal.serializers import key_to_kafka, value_to_kafka
-from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_hex
-from swh.model.tests.swh_model_data import DUPLICATE_CONTENTS, TEST_OBJECTS
+from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes, hash_to_hex
+from swh.model.model import Revision, RevisionType
+from swh.model.tests.swh_model_data import (
+ COMMITTERS,
+ DATES,
+ DUPLICATE_CONTENTS,
+ REVISIONS,
+)
+from swh.model.tests.swh_model_data import TEST_OBJECTS as _TEST_OBJECTS
from swh.storage import get_storage
from swh.storage.cassandra.model import ContentRow, SkippedContentRow
from swh.storage.in_memory import InMemoryStorage
@@ -23,6 +30,23 @@
UTC = datetime.timezone.utc
+TEST_OBJECTS = _TEST_OBJECTS.copy()
+TEST_OBJECTS["revision"] = list(_TEST_OBJECTS["revision"]) + [
+ Revision(
+ id=hash_to_bytes("a569b03ebe6e5f9f2f6077355c40d89bd6986d0c"),
+ message=b"hello again",
+ date=DATES[1],
+ committer=COMMITTERS[1],
+ author=COMMITTERS[0],
+ committer_date=DATES[0],
+ type=RevisionType.GIT,
+ directory=b"\x03" * 20,
+ synthetic=False,
+ metadata={"something": "interesting"},
+ parents=(REVISIONS[0].id,),
+ ),
+]
+
def nullify_ctime(obj):
if isinstance(obj, (ContentRow, SkippedContentRow)):
@@ -212,6 +236,10 @@
author=row.author.anonymize(),
committer=row.committer.anonymize(),
)
+ if attr == "revisions":
+ # the replayer should now drop the metadata attribute; see
+ # swh/storgae/replay.py:_insert_objects()
+ row.metadata = "null"
return row
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 9:06 PM (2 d, 16 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221868
Attached To
D5414: Make the replayer drop the Revision.metadata
Event Timeline
Log In to Comment