diff --git a/swh/provenance/postgresql/provenancedb_with_path.py b/swh/provenance/postgresql/provenancedb_with_path.py --- a/swh/provenance/postgresql/provenancedb_with_path.py +++ b/swh/provenance/postgresql/provenancedb_with_path.py @@ -34,27 +34,16 @@ ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]: self.cursor.execute( """ - SELECT content_location.sha1 AS blob, - revision.sha1 AS rev, - revision.date AS date, - content_location.path AS path - FROM (SELECT content_hex.sha1, - content_hex.rev, - location.path - FROM (SELECT content.sha1, - content_early_in_rev.rev, - content_early_in_rev.loc - FROM content_early_in_rev - JOIN content - ON content.id=content_early_in_rev.blob - WHERE content.sha1=%s - ) AS content_hex - JOIN location - ON location.id=content_hex.loc - ) AS content_location - JOIN revision - ON revision.id=content_location.rev - ORDER BY date, rev, path ASC LIMIT 1 + SELECT C.sha1 AS blob, + R.sha1 AS rev, + R.date AS date, + L.path AS path + FROM content AS C + INNER JOIN content_early_in_rev AS CR ON (CR.blob = C.id) + INNER JOIN location as L ON (CR.loc = L.id) + INNER JOIN revision as R ON (CR.rev = R.id) + WHERE C.sha1=%s + ORDER BY date, rev, path ASC LIMIT 1 """, (blobid,), ) @@ -66,67 +55,31 @@ early_cut = f"LIMIT {limit}" if limit is not None else "" self.cursor.execute( f""" - (SELECT content_location.sha1 AS blob, - revision.sha1 AS rev, - revision.date AS date, - content_location.path AS path - FROM (SELECT content_hex.sha1, - content_hex.rev, - location.path - FROM (SELECT content.sha1, - content_early_in_rev.rev, - content_early_in_rev.loc - FROM content_early_in_rev - JOIN content - ON content.id=content_early_in_rev.blob - WHERE content.sha1=%s - ) AS content_hex - JOIN location - ON location.id=content_hex.loc - ) AS content_location - JOIN revision - ON revision.id=content_location.rev - ) + (SELECT C.sha1 AS blob, + R.sha1 AS rev, + R.date AS date, + L.path AS path + FROM content AS C + INNER JOIN content_early_in_rev AS CR ON (CR.blob = C.id) + INNER JOIN location AS L ON (CR.loc = L.id) + INNER JOIN revision AS R ON (CR.rev = R.id) + WHERE C.sha1=%s) UNION - (SELECT content_prefix.sha1 AS blob, - revision.sha1 AS rev, - revision.date AS date, - content_prefix.path AS path - FROM (SELECT content_in_rev.sha1, - content_in_rev.rev, - CASE location.path - WHEN '' THEN content_in_rev.suffix - WHEN '.' THEN content_in_rev.suffix - ELSE (location.path || '/' || - content_in_rev.suffix)::unix_path - END AS path - FROM (SELECT content_suffix.sha1, - directory_in_rev.rev, - directory_in_rev.loc, - content_suffix.path AS suffix - FROM (SELECT content_hex.sha1, - content_hex.dir, - location.path - FROM (SELECT content.sha1, - content_in_dir.dir, - content_in_dir.loc - FROM content_in_dir - JOIN content - ON content_in_dir.blob=content.id - WHERE content.sha1=%s - ) AS content_hex - JOIN location - ON location.id=content_hex.loc - ) AS content_suffix - JOIN directory_in_rev - ON directory_in_rev.dir=content_suffix.dir - ) AS content_in_rev - JOIN location - ON location.id=content_in_rev.loc - ) AS content_prefix - JOIN revision - ON revision.id=content_prefix.rev - ) + (SELECT C.sha1 AS blob, + R.sha1 AS rev, + R.date AS date, + CASE DL.path + WHEN '' THEN CL.path + WHEN '.' THEN CL.path + ELSE (DL.path || '/' || CL.path)::unix_path + END AS path + FROM content AS C + INNER JOIN content_in_dir AS CD ON (C.id = CD.blob) + INNER JOIN directory_in_rev AS DR ON (CD.dir = DR.dir) + INNER JOIN revision AS R ON (DR.rev = R.id) + INNER JOIN location AS CL ON (CD.loc = CL.id) + INNER JOIN location AS DL ON (DR.loc = DL.id) + WHERE C.sha1=%s) ORDER BY date, rev, path {early_cut} """, (blobid, blobid),