diff --git a/swh/journal/replay.py b/swh/journal/replay.py --- a/swh/journal/replay.py +++ b/swh/journal/replay.py @@ -234,7 +234,10 @@ # TODO: insert 'content' in batches for object_ in objects: try: - storage.content_add_metadata([object_]) + if object_.get('status') == 'absent': + storage.skipped_content_add([object_]) + else: + storage.content_add_metadata([object_]) except HashCollision as e: logger.error('Hash collision: %s', e.args) elif object_type in ('directory', 'revision', 'release', diff --git a/swh/journal/tests/test_write_replay.py b/swh/journal/tests/test_write_replay.py --- a/swh/journal/tests/test_write_replay.py +++ b/swh/journal/tests/test_write_replay.py @@ -60,6 +60,8 @@ storage1.origin_add_one({'url': obj['origin']}) storage1.origin_visit_upsert([obj]) else: + if obj_type == 'content' and obj.get('status') == 'absent': + obj_type = 'skipped_content' method = getattr(storage1, obj_type + '_add') try: method([obj]) @@ -117,7 +119,8 @@ if obj_type == 'content': # avoid hash collision if not storage1.content_find(obj): - storage1.content_add([obj]) + if obj.get('status') != 'absent': + storage1.content_add([obj]) contents.append(obj) queue_size = len(queue)