diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py --- a/swh/provenance/postgresql/archive.py +++ b/swh/provenance/postgresql/archive.py @@ -30,44 +30,59 @@ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"}) def _directory_ls(self, id: Sha1Git, minsize: int = 0) -> List[Dict[str, Any]]: with self.conn.cursor() as cursor: - cursor.execute( - """ - WITH - dir AS (SELECT id AS dir_id, dir_entries, file_entries, rev_entries - FROM directory WHERE id=%s), - ls_d AS (SELECT dir_id, UNNEST(dir_entries) AS entry_id FROM dir), - ls_f AS (SELECT dir_id, UNNEST(file_entries) AS entry_id FROM dir), - ls_r AS (SELECT dir_id, UNNEST(rev_entries) AS entry_id FROM dir) - (SELECT 'dir'::directory_entry_type AS type, e.target, e.name, - NULL::sha1_git - FROM ls_d - LEFT JOIN directory_entry_dir e ON ls_d.entry_id=e.id) - UNION - (WITH known_contents AS - (SELECT 'file'::directory_entry_type AS type, e.target, e.name, - c.sha1_git - FROM ls_f - LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id - INNER JOIN content c ON e.target=c.sha1_git - WHERE c.length >= %s - ) - SELECT * FROM known_contents - UNION - (SELECT 'file'::directory_entry_type AS type, e.target, e.name, - c.sha1_git - FROM ls_f - LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id - LEFT JOIN skipped_content c ON e.target=c.sha1_git - WHERE NOT EXISTS ( + if minsize > 0: + cursor.execute( + """ + WITH + dir AS (SELECT id AS dir_id, dir_entries, file_entries + FROM directory WHERE id=%s), + ls_d AS (SELECT dir_id, UNNEST(dir_entries) AS entry_id FROM dir), + ls_f AS (SELECT dir_id, UNNEST(file_entries) AS entry_id FROM dir) + (SELECT 'dir'::directory_entry_type AS type, e.target, e.name + FROM ls_d + LEFT JOIN directory_entry_dir e ON ls_d.entry_id=e.id) + UNION ALL + (WITH known_contents AS + (SELECT 'file'::directory_entry_type AS type, e.target, e.name + FROM ls_f + LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id + INNER JOIN content c ON e.target=c.sha1_git + WHERE c.length >= %s + ) + SELECT * FROM known_contents + UNION ALL + (SELECT 'file'::directory_entry_type AS type, e.target, e.name + FROM ls_f + LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id + LEFT JOIN skipped_content c ON e.target=c.sha1_git + WHERE NOT EXISTS ( SELECT 1 FROM known_contents - WHERE known_contents.sha1_git=e.target - ) - AND c.length >= %s + WHERE known_contents.target=e.target + ) + AND c.length >= %s + ) ) + """, + (id, minsize, minsize), + ) + else: + cursor.execute( + """ + WITH + dir AS (SELECT id AS dir_id, dir_entries, file_entries + FROM directory WHERE id=%s), + ls_d AS (SELECT dir_id, UNNEST(dir_entries) AS entry_id FROM dir), + ls_f AS (SELECT dir_id, UNNEST(file_entries) AS entry_id FROM dir) + (SELECT 'dir'::directory_entry_type AS type, e.target, e.name + FROM ls_d + LEFT JOIN directory_entry_dir e ON ls_d.entry_id=e.id) + UNION ALL + (SELECT 'file'::directory_entry_type AS type, e.target, e.name + FROM ls_f + LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id) + """, + (id,), ) - """, - (id, minsize, minsize), - ) return [ {"type": row[0], "target": row[1], "name": row[2]} for row in cursor ]