diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -75,13 +75,15 @@ def _content_add(self, contents, with_data): if self.journal_writer: - for content in contents: - content = attr.evolve(content, data=None) + for (content, origin) in contents: + content = attr.evolve(content, data=None).to_dict() + if origin is not None: + content['origin'] = origin self.journal_writer.write_addition('content', content) content_with_data = [] content_without_data = [] - for content in contents: + for (content, origin) in contents: if content.status is None: content.status = 'visible' if content.length is None: @@ -89,7 +91,7 @@ if content.status == 'visible': content_with_data.append(content) elif content.status == 'absent': - content_without_data.append(content) + content_without_data.append((content, origin)) count_content_added, count_content_bytes_added = \ self._content_add_present(content_with_data, with_data) @@ -139,17 +141,28 @@ def _content_add_absent(self, contents): count = 0 + if not contents: + # Shortcut, needed for the zip() below to not return [] + return count + (contents, origins) = zip(*contents) skipped_content_missing = self.skipped_content_missing(contents) - for content in skipped_content_missing: + for (content, origin) in zip(skipped_content_missing, origins): key = self._content_key(content) for algo in DEFAULT_ALGORITHMS: self._skipped_content_indexes[algo][content.get_hash(algo)] \ .add(key) - self._skipped_contents[key] = content + self._skipped_contents[key] = (content, origin) count += 1 return count + def _content_to_model(self, contents): + """Takes a list of content dicts, optionally with an extra 'origin' + key, and returns a list of tuples (model.Content, origin).""" + contents = (cont.copy() for cont in contents) + contents = ((cont, cont.pop('origin', None)) for cont in contents) + return [(Content.from_dict(cont), orig) for (cont, orig) in contents] + def content_add(self, content): """Add content blobs to the storage @@ -179,9 +192,9 @@ skipped_content:add: New skipped contents (no data) added """ - content = [Content.from_dict(c) for c in content] + content = self._content_to_model(content) now = datetime.datetime.now(tz=datetime.timezone.utc) - for item in content: + for (item, _) in content: item.ctime = now return self._content_add(content, with_data=True) @@ -214,7 +227,7 @@ skipped_content:add: New skipped contents (no data) added """ - content = [Content.from_dict(c) for c in content] + content = self._content_to_model(content) return self._content_add(content, with_data=False) def content_get(self, content): diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -198,6 +198,15 @@ raise if content_without_data: + content_without_data = \ + [cont.copy() for cont in content_without_data] + origins = db.origin_get_by_url( + [cont.get('origin') for cont in content_without_data], + cur=cur) + for (cont, origin) in zip(content_without_data, origins): + origin = dict(zip(db.origin_cols, origin)) + if 'origin' in cont: + cont['origin'] = origin['id'] db.mktemp('skipped_content', cur) db.copy_to(content_without_data, 'tmp_skipped_content', db.skipped_content_keys, cur) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -119,6 +119,7 @@ '2c772cc193778aac9a137b8dc5834b9b'), 'reason': 'Content too long', 'status': 'absent', + 'origin': 'file:///dev/zero', } self.skipped_cont2 = {