Page MenuHomeSoftware Heritage

D5413.id19364.diff
No OneTemporary

D5413.id19364.diff

diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -675,6 +675,8 @@
]
db.mktemp("extid", cur)
+ self.journal_writer.extid_add(ids)
+
db.copy_to(extid, "tmp_extid", db.extid_cols, cur)
# move metadata in place
diff --git a/swh/storage/tests/test_backfill.py b/swh/storage/tests/test_backfill.py
--- a/swh/storage/tests/test_backfill.py
+++ b/swh/storage/tests/test_backfill.py
@@ -19,6 +19,7 @@
compute_query,
raw_extrinsic_metadata_target_ranges,
)
+from swh.storage.in_memory import InMemoryStorage
from swh.storage.replay import process_replay_objects
from swh.storage.tests.test_replay import check_replayed
@@ -287,6 +288,8 @@
replayer2.process(worker_fn2)
# Compare storages
+ assert isinstance(sto1, InMemoryStorage) # needed to help mypy
+ assert isinstance(sto2, InMemoryStorage)
check_replayed(sto1, sto2)
for record in caplog.records:
diff --git a/swh/storage/tests/test_replay.py b/swh/storage/tests/test_replay.py
--- a/swh/storage/tests/test_replay.py
+++ b/swh/storage/tests/test_replay.py
@@ -85,7 +85,9 @@
nb_inserted = replayer.process(worker_fn)
assert nb_sent == nb_inserted
- _check_replayed(src, dst)
+ assert isinstance(src, InMemoryStorage) # needed to help mypy
+ assert isinstance(dst, InMemoryStorage)
+ check_replayed(src, dst)
collision = 0
for record in caplog.records:
@@ -165,7 +167,9 @@
assert expected_content_hashes in actual_colliding_hashes
# all objects from the src should exists in the dst storage
- _check_replayed(src, dst, exclude=["contents"])
+ assert isinstance(src, InMemoryStorage) # needed to help mypy
+ assert isinstance(dst, InMemoryStorage) # needed to help mypy
+ check_replayed(src, dst, exclude=["contents"])
# but the dst has one content more (one of the 2 colliding ones)
assert (
len(list(src._cql_runner._contents.iter_all()))
@@ -188,12 +192,29 @@
# utility functions
-def _check_replayed(
- src: InMemoryStorage, dst: InMemoryStorage, exclude: Optional[Container] = None
+def check_replayed(
+ src: InMemoryStorage,
+ dst: InMemoryStorage,
+ exclude: Optional[Container] = None,
+ expected_anonymized=False,
):
- """Simple utility function to compare the content of 2 in_memory storages
+ """Simple utility function to compare the content of 2 in_memory storages"""
+
+ def fix_expected(attr, row):
+ if expected_anonymized:
+ if attr == "releases":
+ row = dataclasses.replace(
+ row, author=row.author and row.author.anonymize()
+ )
+ elif attr == "revisions":
+ row = dataclasses.replace(
+ row,
+ author=row.author.anonymize(),
+ committer=row.committer.anonymize(),
+ )
+
+ return row
- """
for attr_ in (
"contents",
"skipped_contents",
@@ -210,7 +231,7 @@
if exclude and attr_ in exclude:
continue
expected_objects = [
- (id, nullify_ctime(obj))
+ (id, nullify_ctime(fix_expected(attr_, obj)))
for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all())
]
got_objects = [
@@ -321,46 +342,6 @@
assert nb_sent == nb_inserted
# Check the contents of the destination storage, and whether the anonymization was
# properly used
+ assert isinstance(storage, InMemoryStorage) # needed to help mypy
+ assert isinstance(dst_storage, InMemoryStorage)
check_replayed(storage, dst_storage, expected_anonymized=not privileged)
-
-
-def check_replayed(src, dst, expected_anonymized=False):
- """Simple utility function to compare the content of 2 in_memory storages
-
- If expected_anonymized is True, objects from the source storage are anonymized
- before comparing with the destination storage.
-
- """
-
- def maybe_anonymize(attr_, row):
- if expected_anonymized:
- if attr_ == "releases":
- row = dataclasses.replace(row, author=row.author.anonymize())
- elif attr_ == "revisions":
- row = dataclasses.replace(
- row,
- author=row.author.anonymize(),
- committer=row.committer.anonymize(),
- )
- return row
-
- for attr_ in (
- "contents",
- "skipped_contents",
- "directories",
- "revisions",
- "releases",
- "snapshots",
- "origins",
- "origin_visit_statuses",
- "raw_extrinsic_metadata",
- ):
- expected_objects = [
- (id, nullify_ctime(maybe_anonymize(attr_, obj)))
- for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all())
- ]
- got_objects = [
- (id, nullify_ctime(obj))
- for id, obj in sorted(getattr(dst._cql_runner, f"_{attr_}").iter_all())
- ]
- assert got_objects == expected_objects, f"Mismatch object list for {attr_}"

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 1:42 PM (1 w, 4 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219225

Event Timeline