diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -126,6 +126,15 @@ SELECT 1 FROM content c WHERE c.sha1 = t.sha1 )""", ((sha1,) for sha1 in sha1s)) + def content_missing_per_sha1_git(self, contents, cur=None): + cur = self._cursor(cur) + + yield from execute_values_generator(cur, """ + SELECT t.sha1_git FROM (VALUES %s) AS t(sha1_git) + WHERE NOT EXISTS ( + SELECT 1 FROM content c WHERE c.sha1_git = t.sha1_git + )""", ((sha1,) for sha1 in contents)) + def skipped_content_missing(self, contents, cur=None): if not contents: return [] @@ -153,6 +162,16 @@ return bool(cur.fetchone()) + def snapshot_missing_from_list(self, snapshots, cur=None): + cur = self._cursor(cur) + yield from execute_values_generator( + cur, """ + SELECT id FROM (VALUES %s) as t(id) + WHERE NOT EXISTS ( + SELECT 1 FROM snapshot d WHERE d.id = t.id + ) + """, ((id,) for id in snapshots)) + def snapshot_add(self, snapshot_id, cur=None): """Add a snapshot from the temporary table""" cur = self._cursor(cur) diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -614,6 +614,21 @@ for obj in db.content_missing_per_sha1(contents, cur): yield obj[0] + @remote_api_endpoint('content/missing/sha1_git') + @timed + @db_transaction_generator() + def content_missing_per_sha1_git(self, contents, db=None, cur=None): + """List content missing from storage based only on sha1_git. + + Args: + contents (Iterable): An iterable of content id (sha1_git) + + Yields: + missing contents sha1_git + """ + for obj in db.content_missing_per_sha1_git(contents, cur): + yield obj[0] + @remote_api_endpoint('content/skipped/missing') @timed @db_transaction_generator() @@ -1180,6 +1195,22 @@ return {'snapshot:add': count} + @remote_api_endpoint('snapshot/missing') + @timed + @db_transaction_generator() + def snapshot_missing(self, snapshots, db=None, cur=None): + """List snapshots missing from storage + + Args: + snapshots (iterable): an iterable of snapshot ids + + Yields: + missing snapshot ids + + """ + for obj in db.snapshot_missing_from_list(snapshots, cur): + yield obj[0] + @remote_api_endpoint('snapshot') @timed @db_transaction(statement_timeout=2000)