diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -38,6 +38,7 @@ self._snapshots = {} self._origins = [] self._origin_visits = [] + self._persons = [] self._origin_metadata = defaultdict(list) self._tools = {} self._metadata_providers = {} @@ -415,6 +416,8 @@ for revision in revisions: if revision['id'] not in self._revisions: self._revisions[revision['id']] = rev = copy.deepcopy(revision) + self._person_add(rev['committer']) + self._person_add(rev['author']) rev['date'] = normalize_timestamp(rev.get('date')) rev['committer_date'] = normalize_timestamp( rev.get('committer_date')) @@ -499,10 +502,12 @@ the date dictionary has the form defined in :mod:`swh.model`. """ for rel in releases: + rel = copy.deepcopy(rel) rel['date'] = normalize_timestamp(rel['date']) + self._person_add(rel['author']) self._objects[rel['id']].append( ('release', rel['id'])) - self._releases.update((rel['id'], rel) for rel in releases) + self._releases[rel['id']] = rel def release_missing(self, releases): """List releases missing from storage @@ -1016,6 +1021,22 @@ origin_visit = self._origin_visits[origin-1][visit-1] return origin_visit + def person_get(self, person): + """Return the persons identified by their ids. + + Args: + person: array of ids. + + Returns: + The array of persons corresponding of the ids. + + """ + for p in person: + if 0 <= (p - 1) < len(self._persons): + yield dict(self._persons[p - 1], id=p) + else: + yield None + def stat_counters(self): """compute statistics about the number of tuples in various tables @@ -1206,6 +1227,26 @@ break return origin_id + def _person_add(self, person): + """Add a person in storage. + + Note: Private method, do not use outside of this class. + + Args: + person: dictionary with keys fullname, name and email. + + """ + key = ('person', person['fullname']) + if key not in self._objects: + person_id = len(self._persons) + 1 + self._persons.append(dict(person)) + self._objects[key].append(('person', person_id)) + else: + person_id = self._objects[key][0][1] + p = next(self.person_get([person_id])) + person.update(p.items()) + person['id'] = person_id + @staticmethod def _content_key(content): """A stable key for a content""" diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1205,6 +1205,68 @@ self.assertIsNone(actual_origin_visit) + def test_person_get(self): + # given (person injection through revision for example) + self.storage.revision_add([self.revision]) + rev = list(self.storage.revision_get([self.revision['id']]))[0] + + id0 = rev['committer']['id'] + person0 = self.revision['committer'] + + id1 = rev['author']['id'] + person1 = self.revision['author'] + + # when + actual_persons = self.storage.person_get([id0, id1]) + + # then + self.assertEqual( + list(actual_persons), [ + { + 'id': id0, + 'fullname': person0['fullname'], + 'name': person0['name'], + 'email': person0['email'], + }, + { + 'id': id1, + 'fullname': person1['fullname'], + 'name': person1['name'], + 'email': person1['email'], + } + ]) + + def test_person_get_fullname_unicity(self): + # given (person injection through revisions for example) + revision = self.revision + + # create a revision with same committer fullname but wo name and email + revision2 = copy.deepcopy(self.revision2) + revision2['committer'] = dict(revision['committer']) + revision2['committer']['email'] = None + revision2['committer']['name'] = None + + self.storage.revision_add([revision]) + self.storage.revision_add([revision2]) + + # when getting added revisions + revisions = list( + self.storage.revision_get([revision['id'], revision2['id']])) + + # then + # check committers are the same + self.assertEqual(revisions[0]['committer'], + revisions[1]['committer']) + + # check person_get return same result + person0 = list( + self.storage.person_get([revisions[0]['committer']['id']]))[0] + + person1 = list( + self.storage.person_get([revisions[1]['committer']['id']]))[0] + + self.assertEqual(person0, person1) + def test_snapshot_add_get_empty(self): origin_id = self.storage.origin_add_one(self.origin) origin_visit1 = self.storage.origin_visit_add(origin_id, @@ -1507,6 +1569,7 @@ self.assertEqual(counters['snapshot'], 1) self.assertEqual(counters['origin'], 1) self.assertEqual(counters['revision'], 1) + self.assertEqual(counters['person'], 2) def test_content_find_with_present_content(self): # 1. with something to find @@ -2104,43 +2167,6 @@ self.assertEqual(expected_fetch_history, fetch_history) - # The remote API doesn't expose _person_add - def test_person_get(self): - # given - person0 = { - 'fullname': b'bob ', - 'name': b'bob', - 'email': b'alice@bob', - } - id0 = self.storage._person_add(person0) - - person1 = { - 'fullname': b'tony ', - 'name': b'tony', - 'email': b'tony@bob', - } - id1 = self.storage._person_add(person1) - - # when - actual_persons = self.storage.person_get([id0, id1]) - - # given (person injection through release for example) - self.assertEqual( - list(actual_persons), [ - { - 'id': id0, - 'fullname': person0['fullname'], - 'name': person0['name'], - 'email': person0['email'], - }, - { - 'id': id1, - 'fullname': person1['fullname'], - 'name': person1['name'], - 'email': person1['email'], - }, - ]) - # This test is only relevant on the local storage, with an actual # objstorage raising an exception def test_content_add_objstorage_exception(self):