diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -126,6 +126,15 @@ SELECT 1 FROM content c WHERE c.sha1 = t.sha1 )""", ((sha1,) for sha1 in sha1s)) + def content_missing_per_sha1_git(self, contents, cur=None): + cur = self._cursor(cur) + + yield from execute_values_generator(cur, """ + SELECT t.sha1_git FROM (VALUES %s) AS t(sha1_git) + WHERE NOT EXISTS ( + SELECT 1 FROM content c WHERE c.sha1_git = t.sha1_git + )""", ((sha1,) for sha1 in contents)) + def skipped_content_missing(self, contents, cur=None): if not contents: return [] @@ -153,6 +162,16 @@ return bool(cur.fetchone()) + def snapshot_missing_from_list(self, snapshots, cur=None): + cur = self._cursor(cur) + yield from execute_values_generator( + cur, """ + SELECT id FROM (VALUES %s) as t(id) + WHERE NOT EXISTS ( + SELECT 1 FROM snapshot d WHERE d.id = t.id + ) + """, ((id,) for id in snapshots)) + def snapshot_add(self, snapshot_id, cur=None): """Add a snapshot from the temporary table""" cur = self._cursor(cur) diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -434,6 +434,19 @@ if content not in self._content_indexes['sha1']: yield content + def content_missing_per_sha1_git(self, contents): + """List content missing from storage based only on sha1_git. + + Args: + contents: An iterable of content id (sha1_git) + + Yields: + missing contents sha1_git + """ + for content in contents: + if content not in self._content_indexes['sha1_git']: + yield content + def skipped_content_missing(self, contents): """List all skipped_content missing from storage @@ -872,6 +885,19 @@ return {'snapshot:add': count} + def snapshot_missing(self, snapshots): + """List snapshot missing from storage + + Args: + snapshots (iterable): an iterable of snapshot ids + + Yields: + missing snapshot ids + """ + for id in snapshots: + if id not in self._snapshots: + yield id + def snapshot_get(self, snapshot_id): """Get the content, possibly partial, of a snapshot with the given id diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -614,6 +614,21 @@ for obj in db.content_missing_per_sha1(contents, cur): yield obj[0] + @remote_api_endpoint('content/missing/sha1_git') + @timed + @db_transaction_generator() + def content_missing_per_sha1_git(self, contents, db=None, cur=None): + """List content missing from storage based only on sha1_git. + + Args: + contents (Iterable): An iterable of content id (sha1_git) + + Yields: + missing contents sha1_git + """ + for obj in db.content_missing_per_sha1_git(contents, cur): + yield obj[0] + @remote_api_endpoint('content/skipped/missing') @timed @db_transaction_generator() @@ -1180,6 +1195,22 @@ return {'snapshot:add': count} + @remote_api_endpoint('snapshot/missing') + @timed + @db_transaction_generator() + def snapshot_missing(self, snapshots, db=None, cur=None): + """List snapshots missing from storage + + Args: + snapshots (iterable): an iterable of snapshot ids + + Yields: + missing snapshot ids + + """ + for obj in db.snapshot_missing_from_list(snapshots, cur): + yield obj[0] + @remote_api_endpoint('snapshot') @timed @db_transaction(statement_timeout=2000) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -367,6 +367,19 @@ # then assert list(gen) == [missing_cont['sha1']] + def test_content_missing_per_sha1_git(self, swh_storage): + cont = data.cont + cont2 = data.cont2 + missing_cont = data.missing_cont + + swh_storage.content_add([cont, cont2]) + + contents = [cont['sha1_git'], cont2['sha1_git'], + missing_cont['sha1_git']] + + missing_contents = swh_storage.content_missing_per_sha1_git(contents) + assert missing_cont['sha1_git'] in missing_contents + def test_content_get_partition(self, swh_storage, swh_contents): """content_get_partition paginates results if limit exceeded""" expected_contents = [c for c in swh_contents @@ -2574,6 +2587,16 @@ data.snapshot['id'], data.empty_snapshot['id'], data.complete_snapshot['id']} + def test_snapshot_missing(self, swh_storage): + snap = data.snapshot + missing_snap = data.empty_snapshot + snapshots = [snap['id'], missing_snap['id']] + swh_storage.snapshot_add([snap]) + + missing_snapshots = swh_storage.snapshot_missing(snapshots) + + assert missing_snap['id'] in missing_snapshots + def test_stat_counters(self, swh_storage): expected_keys = ['content', 'directory', 'origin', 'revision']