diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -74,6 +74,7 @@ for content in contents: key = self._content_key(content) if key in self._contents: + self._objects[key].append(('skipped_content', content['sha1'])) continue for algorithm in DEFAULT_ALGORITHMS: if content[algorithm] in self._content_indexes[algorithm]: @@ -169,7 +170,7 @@ """ return self._content_add(contents, with_data=False) - def content_get(self, ids): + def content_get(self, ids, start=0, end=-1): """Retrieve in bulk contents and their data. This function may yield more blobs than provided sha1 identifiers, @@ -1170,7 +1171,8 @@ 'origin': origin_id, 'visit': visit_id, } - + key = (origin_id, visit_id) + self._objects[key].append(('origin_visit', origin_id)) if self.journal_writer: origin = self.origin_get([{'id': origin_id}])[0] del origin['id'] @@ -1514,12 +1516,12 @@ if key not in self._objects: person_id = len(self._persons) + 1 self._persons.append(dict(person)) - self._objects[key].append(('person', person_id)) else: person_id = self._objects[key][0][1] p = next(self.person_get([person_id])) person.update(p.items()) person['id'] = person_id + self._objects[key].append(('person', person_id)) @staticmethod def _content_key(content): diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -334,7 +334,7 @@ return summary - def content_get(self, content): + def content_get(self, content, start=0, end=-1): """Retrieve in bulk contents and their data. This generator yields exactly as many items than provided sha1 @@ -369,7 +369,10 @@ yield None continue - yield {'sha1': obj_id, 'data': data} + if end != -1: + yield {'sha1': obj_id, 'data': data[start:end]} + else: + yield {'sha1': obj_id, 'data': data} @db_transaction() def content_get_range(self, start, end, limit=1000, db=None, cur=None):