diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -63,6 +63,8 @@ content_get_metadata_keys = [ 'sha1', 'sha1_git', 'sha256', 'blake2s256', 'length', 'status'] + content_add_keys = content_get_metadata_keys + ['ctime'] + skipped_content_keys = [ 'sha1', 'sha1_git', 'sha256', 'blake2s256', 'length', 'reason', 'status', 'origin'] diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -84,7 +84,6 @@ self._objects[content['sha1_git']].append( ('content', content['sha1'])) self._contents[key] = copy.deepcopy(content) - self._contents[key]['ctime'] = now() bisect.insort(self._sorted_sha1s, content['sha1']) count_contents += 1 if self._contents[key]['status'] == 'visible': @@ -133,6 +132,10 @@ skipped_content:add: New skipped contents (no data) added """ + contents = [dict(c.items()) for c in contents] # semi-shallow copy + now = datetime.datetime.now(tz=datetime.timezone.utc) + for item in contents: + item['ctime'] = now return self._content_add(contents, with_data=True) def content_add_metadata(self, contents): @@ -152,6 +155,7 @@ - reason (str): if status = absent, the reason why - origin (int): if status = absent, the origin we saw the content in + - ctime (datetime): time of insertion in the archive Raises: HashCollision in case of collision diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql --- a/swh/storage/sql/40-swh-func.sql +++ b/swh/storage/sql/40-swh-func.sql @@ -203,8 +203,8 @@ language plpgsql as $$ begin - insert into content (sha1, sha1_git, sha256, blake2s256, length, status) - select distinct sha1, sha1_git, sha256, blake2s256, length, status from tmp_content; + insert into content (sha1, sha1_git, sha256, blake2s256, length, status, ctime) + select distinct sha1, sha1_git, sha256, blake2s256, length, status, ctime from tmp_content; return; end $$; diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -141,7 +141,7 @@ db.mktemp('content', cur) db.copy_to(content_with_data, 'tmp_content', - db.content_get_metadata_keys, cur) + db.content_add_keys, cur) # move metadata in place try: @@ -207,6 +207,10 @@ content:bytes:add: Sum of the contents' length data skipped_content:add: New skipped contents (no data) added """ + content = [dict(c.items()) for c in content] # semi-shallow copy + now = datetime.datetime.now(tz=datetime.timezone.utc) + for item in content: + item['ctime'] = now if self.journal_writer: for item in content: @@ -306,6 +310,7 @@ - reason (str): if status = absent, the reason why - origin (int): if status = absent, the origin we saw the content in + - ctime (datetime): time of insertion in the archive Returns: Summary dict with the following key and associated values: diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -566,7 +566,11 @@ expected_cont = cont.copy() del expected_cont['data'] - self.assertEqual(list(self.journal_writer.objects), + journal_objects = list(self.journal_writer.objects) + for (obj_type, obj) in journal_objects: + if 'ctime' in obj: + del obj['ctime'] + self.assertEqual(journal_objects, [('content', expected_cont)]) def test_content_add_same_input(self): @@ -615,7 +619,11 @@ expected_cont = cont.copy() del expected_cont['data'] - self.assertEqual(list(self.journal_writer.objects), + journal_objects = list(self.journal_writer.objects) + for (obj_type, obj) in journal_objects: + if 'ctime' in obj: + del obj['ctime'] + self.assertEqual(journal_objects, [('content', expected_cont)]) def test_content_add_collision(self): @@ -635,6 +643,7 @@ def test_content_add_metadata(self): cont = self.cont.copy() del cont['data'] + cont['ctime'] = datetime.datetime.now() actual_result = self.storage.content_add_metadata([cont]) self.assertEqual(actual_result, { @@ -642,9 +651,11 @@ 'skipped_content:add': 0 }) + expected_cont = cont.copy() + del expected_cont['ctime'] self.assertEqual( list(self.storage.content_get_metadata([cont['sha1']])), - [cont]) + [expected_cont]) self.assertEqual(list(self.journal_writer.objects), [('content', cont)]) @@ -652,6 +663,7 @@ def test_content_add_metadata_same_input(self): cont = self.cont.copy() del cont['data'] + cont['ctime'] = datetime.datetime.now() actual_result = self.storage.content_add_metadata([cont, cont]) self.assertEqual(actual_result, { @@ -662,8 +674,10 @@ def test_content_add_metadata_different_input(self): cont = self.cont.copy() del cont['data'] + cont['ctime'] = datetime.datetime.now() cont2 = self.cont2.copy() del cont2['data'] + cont2['ctime'] = datetime.datetime.now() actual_result = self.storage.content_add_metadata([cont, cont2]) self.assertEqual(actual_result, { @@ -674,6 +688,7 @@ def test_content_add_metadata_db(self): cont = self.cont.copy() del cont['data'] + cont['ctime'] = datetime.datetime.now() actual_result = self.storage.content_add_metadata([cont]) @@ -700,6 +715,7 @@ def test_content_add_metadata_collision(self): cont1 = self.cont.copy() del cont1['data'] + cont1['ctime'] = datetime.datetime.now() # create (corrupted) content with same sha1{,_git} but != sha256 cont1b = cont1.copy() @@ -2209,6 +2225,25 @@ self.assertEqual(counters['revision'], 1) self.assertEqual(counters['person'], 2) + def test_content_find_ctime(self): + cont = self.cont.copy() + del cont['data'] + now = datetime.datetime.now(tz=datetime.timezone.utc) + cont['ctime'] = now + self.storage.content_add_metadata([cont]) + + actually_present = self.storage.content_find({'sha1': cont['sha1']}) + + self.assertEqual(actually_present, { + 'ctime': now, + 'sha1': cont['sha1'], + 'sha256': cont['sha256'], + 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], + 'length': cont['length'], + 'status': 'visible' + }) + def test_content_find_with_present_content(self): # 1. with something to find cont = self.cont