diff --git a/sql/upgrades/135.sql b/sql/upgrades/135.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/135.sql @@ -0,0 +1,87 @@ +-- SWH DB schema upgrade +-- from_version: 134 +-- to_version: 135 +-- description: Make directory_ls return the blake2s256 hash + +insert into dbversion(version, release, description) + values(135, now(), 'Work In Progress'); + +-- a directory listing entry with all the metadata +-- +-- can be used to list a directory, and retrieve all the data in one go. +create type directory_entry as +( + dir_id sha1_git, -- id of the parent directory + type directory_entry_type, -- type of entry + target sha1_git, -- id of target + name unix_path, -- path name, relative to containing dir + perms file_perms, -- unix-like permissions + status content_status, -- visible or absent + sha1 sha1, -- content if sha1 if type is not dir + sha1_git sha1_git, -- content's sha1 git if type is not dir + sha256 sha256, -- content's sha256 if type is not dir + blake2s256 blake2s256, -- content's blake2s256 if type is not dir + length bigint -- content length if type is not dir +); + + +-- List a single level of directory walked_dir_id +-- FIXME: order by name is not correct. For git, we need to order by +-- lexicographic order but as if a trailing / is present in directory +-- name +create or replace function swh_directory_walk_one(walked_dir_id sha1_git) + returns setof directory_entry + language sql + stable +as $$ + with dir as ( + select id as dir_id, dir_entries, file_entries, rev_entries + from directory + where id = walked_dir_id), + ls_d as (select dir_id, unnest(dir_entries) as entry_id from dir), + ls_f as (select dir_id, unnest(file_entries) as entry_id from dir), + ls_r as (select dir_id, unnest(rev_entries) as entry_id from dir) + (select dir_id, 'dir'::directory_entry_type as type, + e.target, e.name, e.perms, NULL::content_status, + NULL::sha1, NULL::sha1_git, NULL::sha256, NULL::blake2s256, + NULL::bigint + from ls_d + left join directory_entry_dir e on ls_d.entry_id = e.id) + union + (select dir_id, 'file'::directory_entry_type as type, + e.target, e.name, e.perms, c.status, + c.sha1, c.sha1_git, c.sha256, c.blake2s256, c.length + from ls_f + left join directory_entry_file e on ls_f.entry_id = e.id + left join content c on e.target = c.sha1_git) + union + (select dir_id, 'rev'::directory_entry_type as type, + e.target, e.name, e.perms, NULL::content_status, + NULL::sha1, NULL::sha1_git, NULL::sha256, NULL::blake2s256, + NULL::bigint + from ls_r + left join directory_entry_rev e on ls_r.entry_id = e.id) + order by name; +$$; + +-- List recursively the revision directory arborescence +create or replace function swh_directory_walk(walked_dir_id sha1_git) + returns setof directory_entry + language sql + stable +as $$ + with recursive entries as ( + select dir_id, type, target, name, perms, status, sha1, sha1_git, + sha256, blake2s256, length + from swh_directory_walk_one(walked_dir_id) + union all + select dir_id, type, target, (dirname || '/' || name)::unix_path as name, + perms, status, sha1, sha1_git, sha256, blake2s256, length + from (select (swh_directory_walk_one(dirs.target)).*, dirs.name as dirname + from (select target, name from entries where type = 'dir') as dirs) as with_parent + ) + select dir_id, type, target, name, perms, status, sha1, sha1_git, sha256, blake2s256, length + from entries +$$; + +drop function swh_revision_walk(revision_id sha1_git); diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -247,7 +247,8 @@ """, ((id,) for id in directories)) directory_ls_cols = ['dir_id', 'type', 'target', 'name', 'perms', - 'status', 'sha1', 'sha1_git', 'sha256', 'length'] + 'status', 'sha1', 'sha1_git', 'sha256', 'blake2s256', + 'length'] def directory_walk_one(self, directory, cur=None): cur = self._cursor(cur) diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -399,6 +399,7 @@ 'sha1', 'sha1_git', 'sha256', + 'blake2s256', 'length', ) ret = dict.fromkeys(keys) diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql --- a/swh/storage/sql/40-swh-func.sql +++ b/swh/storage/sql/40-swh-func.sql @@ -358,16 +358,17 @@ -- can be used to list a directory, and retrieve all the data in one go. create type directory_entry as ( - dir_id sha1_git, -- id of the parent directory - type directory_entry_type, -- type of entry - target sha1_git, -- id of target - name unix_path, -- path name, relative to containing dir - perms file_perms, -- unix-like permissions - status content_status, -- visible or absent - sha1 sha1, -- content if sha1 if type is not dir - sha1_git sha1_git, -- content's sha1 git if type is not dir - sha256 sha256, -- content's sha256 if type is not dir - length bigint -- content length if type is not dir + dir_id sha1_git, -- id of the parent directory + type directory_entry_type, -- type of entry + target sha1_git, -- id of target + name unix_path, -- path name, relative to containing dir + perms file_perms, -- unix-like permissions + status content_status, -- visible or absent + sha1 sha1, -- content if sha1 if type is not dir + sha1_git sha1_git, -- content's sha1 git if type is not dir + sha256 sha256, -- content's sha256 if type is not dir + blake2s256 blake2s256, -- content's blake2s256 if type is not dir + length bigint -- content length if type is not dir ); @@ -389,20 +390,22 @@ ls_r as (select dir_id, unnest(rev_entries) as entry_id from dir) (select dir_id, 'dir'::directory_entry_type as type, e.target, e.name, e.perms, NULL::content_status, - NULL::sha1, NULL::sha1_git, NULL::sha256, NULL::bigint + NULL::sha1, NULL::sha1_git, NULL::sha256, NULL::blake2s256, + NULL::bigint from ls_d left join directory_entry_dir e on ls_d.entry_id = e.id) union (select dir_id, 'file'::directory_entry_type as type, e.target, e.name, e.perms, c.status, - c.sha1, c.sha1_git, c.sha256, c.length + c.sha1, c.sha1_git, c.sha256, c.blake2s256, c.length from ls_f left join directory_entry_file e on ls_f.entry_id = e.id left join content c on e.target = c.sha1_git) union (select dir_id, 'rev'::directory_entry_type as type, e.target, e.name, e.perms, NULL::content_status, - NULL::sha1, NULL::sha1_git, NULL::sha256, NULL::bigint + NULL::sha1, NULL::sha1_git, NULL::sha256, NULL::blake2s256, + NULL::bigint from ls_r left join directory_entry_rev e on ls_r.entry_id = e.id) order by name; @@ -416,30 +419,18 @@ as $$ with recursive entries as ( select dir_id, type, target, name, perms, status, sha1, sha1_git, - sha256, length + sha256, blake2s256, length from swh_directory_walk_one(walked_dir_id) union all select dir_id, type, target, (dirname || '/' || name)::unix_path as name, - perms, status, sha1, sha1_git, sha256, length + perms, status, sha1, sha1_git, sha256, blake2s256, length from (select (swh_directory_walk_one(dirs.target)).*, dirs.name as dirname from (select target, name from entries where type = 'dir') as dirs) as with_parent ) - select dir_id, type, target, name, perms, status, sha1, sha1_git, sha256, length + select dir_id, type, target, name, perms, status, sha1, sha1_git, sha256, blake2s256, length from entries $$; -create or replace function swh_revision_walk(revision_id sha1_git) - returns setof directory_entry - language sql - stable -as $$ - select dir_id, type, target, name, perms, status, sha1, sha1_git, sha256, length - from swh_directory_walk((select directory from revision where id=revision_id)) -$$; - -COMMENT ON FUNCTION swh_revision_walk(sha1_git) IS 'Recursively list the revision targeted directory arborescence'; - - -- Find a directory entry by its path create or replace function swh_find_directory_entry_by_path( walked_dir_id sha1_git, diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -875,6 +875,7 @@ 'sha1': None, 'sha1_git': None, 'sha256': None, + 'blake2s256': None, 'length': None, } @@ -925,6 +926,22 @@ self._transform_entries(self.dir, prefix=b'subdir/'))) self.assertCountEqual(expected_data, actual_data) + def test_directory_get_known_content(self): + init_missing = list(self.storage.directory_missing([self.dir['id']])) + self.assertEqual([self.dir['id']], init_missing) + + self.storage.content_add([self.cont]) + self.storage.directory_add([self.dir]) + + actual_data = list(self.storage.directory_ls(self.dir['id'])) + expected_data = list(self._transform_entries(self.dir)) + assert any(entry['type'] == 'file' for entry in expected_data) + for entry in expected_data: + if entry['type'] == 'file': + entry.update(self.cont) + del entry['data'] + self.assertCountEqual(expected_data, actual_data) + def test_directory_entry_get_by_path(self): # given init_missing = list(self.storage.directory_missing([self.dir3['id']])) @@ -942,6 +959,7 @@ 'sha1': None, 'sha1_git': None, 'sha256': None, + 'blake2s256': None, 'status': None, 'perms': from_disk.DentryPerms.content, 'length': None, @@ -954,6 +972,7 @@ 'sha1': None, 'sha1_git': None, 'sha256': None, + 'blake2s256': None, 'status': None, 'perms': from_disk.DentryPerms.directory, 'length': None, @@ -966,6 +985,7 @@ 'sha1': None, 'sha1_git': None, 'sha256': None, + 'blake2s256': None, 'status': None, 'perms': from_disk.DentryPerms.content, 'length': None,