diff --git a/sql/clusters.dot b/sql/clusters.dot
index d22b50f4..dec8dc4f 100644
--- a/sql/clusters.dot
+++ b/sql/clusters.dot
@@ -1,91 +1,84 @@
 
 subgraph "logical_grouping" {
 	style = rounded;
 	bgcolor = gray95;
 	color = gray;
 	
 	subgraph cluster_meta {
 		label = <<b>meta</b>>;
 		dbversion;
 	}
 
 	subgraph cluster_content {
 		label = <<b>content</b>>;
 		content;
 	}
 
 	subgraph cluster_directory {
 		label = <<b>directories</b>>;
 		directory;
 		directory_entry_dir;
 		directory_entry_file;
 		directory_entry_rev;
-		directory_list_dir;
-		directory_list_file;
-		directory_list_rev;
 
-		{rank=same;
-			directory_list_dir
-			directory_list_file
-			directory_list_rev}
 		{rank=same;
 			directory_entry_dir
 			directory_entry_file
 			directory_entry_rev}
 	}
 
 	subgraph cluster_revision {
 		label = <<b>revisions</b>>;
 		revision;
 		revision_history;
 		person;
 	}
 
 	subgraph cluster_release {
 		label = <<b>releases</b>>;
 		release;
 	}
 
 	subgraph cluster_occurrences {
 		label = <<b>occurrences</b>>;
 		occurrence;
 		occurrence_history;
 	}
 
 	subgraph cluster_origins {
 		label = <<b>origins</b>>;
 		origin;
 		fetch_history;
 	}
 
 	subgraph cluster_projects {
 		label = <<b>projects</b>>;
 		project;
 		project_history;
 	}
 
 	subgraph cluster_organization {
 		label = <<b>organizations</b>>;
 		organization;
 		list_history;
 	}
 
 	{
 		edge [style = dashed];
 
 		# "rtcolN" identifies the N-th row in a table, as a source
 		# "ltcolN" identifies the N-th row in a table, as a destination
 		"directory_entry_dir":rtcol2 -> "directory":ltcol1;
 		"directory_entry_file":rtcol2 -> "content":ltcol2;
 		"directory_entry_rev":rtcol2 -> "revision":ltcol1;
-		"directory_list_dir":rtcol2 -> "directory_entry_dir":ltcol1;
-		"directory_list_file":rtcol2 -> "directory_entry_file":ltcol1;
-		"directory_list_rev":rtcol2 -> "directory_entry_rev":ltcol1;
+		"directory":rtcol2 -> "directory_entry_dir":ltcol1;
+		"directory":rtcol3 -> "directory_entry_file":ltcol1;
+		"directory":rtcol4 -> "directory_entry_rev":ltcol1;
 		"occurrence":rtcol3 -> "revision":ltcol1;
 		"occurrence_history":rtcol3 -> "revision":ltcol1;
 		"release":rtcol2 -> "revision":ltcol1;
 		"revision":rtcol7 -> "directory":ltcol1;
 		"revision_history":rtcol2 -> "revision":ltcol1;
 	}
 
 }
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
index d615b7db..9335ac41 100644
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -1,698 +1,724 @@
 
 -- create a temporary table called tmp_TBLNAME, mimicking existing table
 -- TBLNAME
 --
 -- Args:
 --     tblname: name of the table to mimick
 create or replace function swh_mktemp(tblname regclass)
     returns void
     language plpgsql
 as $$
 begin
     execute format('
 	create temporary table tmp_%I
 	    (like %I including defaults)
 	    on commit drop
 	', tblname, tblname);
     return;
 end
 $$;
 
 -- create a temporary table for directory entries called tmp_TBLNAME,
 -- mimicking existing table TBLNAME with an extra dir_id (sha1_git)
 -- column, and dropping the id column.
 --
 -- This is used to create the tmp_directory_entry_<foo> tables.
 --
 -- Args:
 --     tblname: name of the table to mimick
 create or replace function swh_mktemp_dir_entry(tblname regclass)
     returns void
     language plpgsql
 as $$
 begin
     execute format('
 	create temporary table tmp_%I
 	    (like %I including defaults, dir_id sha1_git)
 	    on commit drop;
         alter table tmp_%I drop column id;
 	', tblname, tblname, tblname, tblname);
     return;
 end
 $$;
 
 -- create a temporary table for revisions called tmp_revisions,
 -- mimicking existing table revision, replacing the foreign keys to
 -- people with an email and name field
 --
 create or replace function swh_mktemp_revision()
     returns void
     language plpgsql
 as $$
 begin
     create temporary table tmp_revision (
         like revision including defaults,
         author_name text not null default '',
         author_email text not null default '',
         committer_name text not null default '',
         committer_email text not null default ''
     ) on commit drop;
     alter table tmp_revision drop column author;
     alter table tmp_revision drop column committer;
     return;
 end
 $$;
 
 -- create a temporary table for releases called tmp_release,
 -- mimicking existing table release, replacing the foreign keys to
 -- people with an email and name field
 --
 create or replace function swh_mktemp_release()
     returns void
     language plpgsql
 as $$
 begin
     create temporary table tmp_release (
         like release including defaults,
         author_name text not null default '',
         author_email text not null default ''
     ) on commit drop;
     alter table tmp_release drop column author;
     return;
 end
 $$;
 
 -- a content signature is a set of cryptographic checksums that we use to
 -- uniquely identify content, for the purpose of verifying if we already have
 -- some content or not during content injection
 create type content_signature as (
     sha1      sha1,
     sha1_git  sha1_git,
     sha256    sha256
 );
 
 
 -- check which entries of tmp_content are missing from content
 --
 -- operates in bulk: 0. swh_mktemp(content), 1. COPY to tmp_content,
 -- 2. call this function
 create or replace function swh_content_missing()
     returns setof content_signature
     language plpgsql
 as $$
 begin
     return query
 	select sha1, sha1_git, sha256 from tmp_content
 	except
 	select sha1, sha1_git, sha256 from content;
     return;
 end
 $$;
 
 
 -- check which entries of tmp_skipped_content are missing from skipped_content
 --
 -- operates in bulk: 0. swh_mktemp(skipped_content), 1. COPY to tmp_skipped_content,
 -- 2. call this function
 create or replace function swh_skipped_content_missing()
     returns setof content_signature
     language plpgsql
 as $$
 begin
     return query
 	select sha1, sha1_git, sha256 from tmp_skipped_content
 	where not exists
 	(select 1 from skipped_content s where
 	    sha1 is not distinct from s.sha1 and
 	    sha1_git is not distinct from s.sha1_git and
 	    sha256 is not distinct from s.sha256);
     return;
 end
 $$;
 
 
 -- Look up content based on one or several different checksums. Return all
 -- content information if the content is found; a NULL row otherwise.
 --
 -- At least one checksum should be not NULL. If several are not NULL, they will
 -- be AND-ed together in the lookup query.
 --
 -- Note: this function is meant to be used to look up individual contents
 -- (e.g., for the web app), for batch lookup of missing content (e.g., to be
 -- added) see swh_content_missing
 create or replace function swh_content_find(
     sha1      sha1     default NULL,
     sha1_git  sha1_git default NULL,
     sha256    sha256   default NULL
 )
     returns content
     language plpgsql
 as $$
 declare
     con content;
     filters text[] := array[] :: text[];  -- AND-clauses used to filter content
     q text;
 begin
     if sha1 is not null then
         filters := filters || format('sha1 = %L', sha1);
     end if;
     if sha1_git is not null then
         filters := filters || format('sha1_git = %L', sha1_git);
     end if;
     if sha256 is not null then
         filters := filters || format('sha256 = %L', sha256);
     end if;
 
     if cardinality(filters) = 0 then
         return null;
     else
         q = format('select * from content where %s',
 	        array_to_string(filters, ' and '));
         execute q into con;
 	return con;
     end if;
 end
 $$;
 
 -- add tmp_content entries to content, skipping duplicates
 --
 -- operates in bulk: 0. swh_mktemp(content), 1. COPY to tmp_content,
 -- 2. call this function
 create or replace function swh_content_add()
     returns void
     language plpgsql
 as $$
 begin
     insert into content (sha1, sha1_git, sha256, length, status)
 	select distinct sha1, sha1_git, sha256, length, status
 	from tmp_content
 	where (sha1, sha1_git, sha256) in
 	    (select * from swh_content_missing());
 	    -- TODO XXX use postgres 9.5 "UPSERT" support here, when available.
 	    -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid
 	    -- the extra swh_content_missing() query here.
     return;
 end
 $$;
 
 
 -- add tmp_skipped_content entries to skipped_content, skipping duplicates
 --
 -- operates in bulk: 0. swh_mktemp(skipped_content), 1. COPY to tmp_skipped_content,
 -- 2. call this function
 create or replace function swh_skipped_content_add()
     returns void
     language plpgsql
 as $$
 begin
     insert into skipped_content (sha1, sha1_git, sha256, length, status, reason, origin)
 	select distinct sha1, sha1_git, sha256, length, status, reason, origin
 	from tmp_skipped_content
 	where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, '')) in
 	    (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, '') from swh_skipped_content_missing());
 	    -- TODO XXX use postgres 9.5 "UPSERT" support here, when available.
 	    -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid
 	    -- the extra swh_content_missing() query here.
     return;
 end
 $$;
 
 
 -- check which entries of tmp_directory are missing from directory
 --
 -- operates in bulk: 0. swh_mktemp(directory), 1. COPY to tmp_directory,
 -- 2. call this function
 create or replace function swh_directory_missing()
     returns setof directory
     language plpgsql
 as $$
 begin
     return query
 	select id from tmp_directory
 	except
 	select id from directory;
     return;
 end
 $$;
 
 -- Add tmp_directory_entry_dir entries to directory_entry_dir and
 -- directory_list_dir, skipping duplicates in directory_entry_dir.
 --
 -- operates in bulk: 0. swh_mktemp_dir_entry('directory_entry_dir'), 1 COPY to
 -- tmp_directory_entry_dir, 2. call this function
+--
+-- Assumption: this function is used in the same transaction that inserts the
+-- context directory in table "directory".
+--
+-- TODO: refactor with other swh_directory_entry_*_add functions
 create or replace function swh_directory_entry_dir_add()
     returns void
     language plpgsql
 as $$
 begin
     insert into directory_entry_dir (target, name, perms, atime, mtime, ctime)
     select distinct t.target, t.name, t.perms, t.atime, t.mtime, t.ctime
     from tmp_directory_entry_dir t
     where not exists (
     select 1
     from directory_entry_dir i
     where t.target = i.target and t.name = i.name and t.perms = i.perms and
        t.atime is not distinct from i.atime and
        t.mtime is not distinct from i.mtime and
        t.ctime is not distinct from i.ctime);
 
-    insert into directory_list_dir (dir_id, entry_ids)
-    select t.dir_id, array_agg(i.id)
-    from tmp_directory_entry_dir t
-    inner join directory_entry_dir i
-    on t.target = i.target and t.name = i.name and t.perms = i.perms and
-       t.atime is not distinct from i.atime and
-       t.mtime is not distinct from i.mtime and
-       t.ctime is not distinct from i.ctime
-    group by t.dir_id;
+    with new_entries as (
+	select t.dir_id, array_agg(i.id) as entries
+	from tmp_directory_entry_dir t
+	inner join directory_entry_dir i
+	on t.target = i.target and t.name = i.name and t.perms = i.perms and
+	   t.atime is not distinct from i.atime and
+	   t.mtime is not distinct from i.mtime and
+	   t.ctime is not distinct from i.ctime
+	group by t.dir_id
+    )
+    update directory as d
+    set dir_entries = new_entries.entries
+    from new_entries
+    where d.id = new_entries.dir_id;
+
     return;
 end
 $$;
 
 -- Add tmp_directory_entry_file entries to directory_entry_file and
 -- directory_list_file, skipping duplicates in directory_entry_file.
 --
 -- operates in bulk: 0. swh_mktemp_dir_entry('directory_entry_file'), 1 COPY to
 -- tmp_directory_entry_file, 2. call this function
+--
+-- Assumption: this function is used in the same transaction that inserts the
+-- context directory in table "directory".
+--
+-- TODO: refactor with other swh_directory_entry_*_add functions
 create or replace function swh_directory_entry_file_add()
     returns void
     language plpgsql
 as $$
 begin
     insert into directory_entry_file (target, name, perms, atime, mtime, ctime)
     select distinct t.target, t.name, t.perms, t.atime, t.mtime, t.ctime
     from tmp_directory_entry_file t
     where not exists (
     select 1
     from directory_entry_file i
     where t.target = i.target and t.name = i.name and t.perms = i.perms and
        t.atime is not distinct from i.atime and
        t.mtime is not distinct from i.mtime and
        t.ctime is not distinct from i.ctime);
 
-    insert into directory_list_file (dir_id, entry_ids)
-    select t.dir_id, array_agg(i.id)
-    from tmp_directory_entry_file t
-    inner join directory_entry_file i
-    on t.target = i.target and t.name = i.name and t.perms = i.perms and
-       t.atime is not distinct from i.atime and
-       t.mtime is not distinct from i.mtime and
-       t.ctime is not distinct from i.ctime
-    group by t.dir_id;
+    with new_entries as (
+	select t.dir_id, array_agg(i.id)
+	from tmp_directory_entry_file t
+	inner join directory_entry_file i
+	on t.target = i.target and t.name = i.name and t.perms = i.perms and
+	   t.atime is not distinct from i.atime and
+	   t.mtime is not distinct from i.mtime and
+	   t.ctime is not distinct from i.ctime
+	group by t.dir_id
+    )
+    update directory as d
+    set file_entries = new_entries.entries
+    from new_entries
+    where d.id = new_entries.dir_id;
+
     return;
 end
 $$;
 
 -- Add tmp_directory_entry_rev entries to directory_entry_rev and
 -- directory_list_rev, skipping duplicates in directory_entry_rev.
 --
 -- operates in bulk: 0. swh_mktemp_dir_entry('directory_entry_rev'), 1 COPY to
 -- tmp_directory_entry_rev, 2. call this function
+--
+-- Assumption: this function is used in the same transaction that inserts the
+-- context directory in table "directory".
+--
+-- TODO: refactor with other swh_directory_entry_*_add functions
 create or replace function swh_directory_entry_rev_add()
     returns void
     language plpgsql
 as $$
 begin
     insert into directory_entry_rev (target, name, perms, atime, mtime, ctime)
     select distinct t.target, t.name, t.perms, t.atime, t.mtime, t.ctime
     from tmp_directory_entry_rev t
     where not exists (
     select 1
     from directory_entry_rev i
     where t.target = i.target and t.name = i.name and t.perms = i.perms and
        t.atime is not distinct from i.atime and
        t.mtime is not distinct from i.mtime and
        t.ctime is not distinct from i.ctime);
 
-    insert into directory_list_rev (dir_id, entry_ids)
-    select t.dir_id, array_agg(i.id)
-    from tmp_directory_entry_rev t
-    inner join directory_entry_rev i
-    on t.target = i.target and t.name = i.name and t.perms = i.perms and
-       t.atime is not distinct from i.atime and
-       t.mtime is not distinct from i.mtime and
-       t.ctime is not distinct from i.ctime
-    group by t.dir_id;
+    with new_entries as (
+	select t.dir_id, array_agg(i.id)
+	from tmp_directory_entry_rev t
+	inner join directory_entry_rev i
+	on t.target = i.target and t.name = i.name and t.perms = i.perms and
+	   t.atime is not distinct from i.atime and
+	   t.mtime is not distinct from i.mtime and
+	   t.ctime is not distinct from i.ctime
+	group by t.dir_id
+    )
+    update directory as d
+    set rev_entries = new_entries.entries
+    from new_entries
+    where d.id = new_entries.dir_id;
+
     return;
 end
 $$;
 
 create type directory_entry_type as enum('file', 'dir', 'rev');
 
 -- a directory listing entry with all the metadata
 --
 -- can be used to list a directory, and retrieve all the data in one go.
 create type directory_entry as
 (
   dir_id  sha1_git,     -- id of the parent directory
   type    directory_entry_type,  -- type of entry
   target  sha1_git,     -- id of target
   name    unix_path,    -- path name, relative to containing dir
   perms   file_perms,   -- unix-like permissions
   atime   timestamptz,  -- time of last access
   mtime   timestamptz,  -- time of last modification
   ctime   timestamptz   -- time of last status change
 );
 
 -- List a single level of directory walked_dir_id
 create or replace function swh_directory_walk_one(walked_dir_id sha1_git)
     returns setof directory_entry
     language plpgsql
 as $$
 begin
-    return query (
-        (with l as
-	     (select dir_id, unnest(entry_ids) as entry_id
-	      from directory_list_dir
-	      where dir_id = walked_dir_id)
-	select dir_id, 'dir'::directory_entry_type as type,
-	       target, name, perms, atime, mtime, ctime
-	from l
-	left join directory_entry_dir d on l.entry_id = d.id)
-    union
-        (with l as
-	     (select dir_id, unnest(entry_ids) as entry_id
-	      from directory_list_file
-	      where dir_id = walked_dir_id)
-        select dir_id, 'file'::directory_entry_type as type,
-	       target, name, perms, atime, mtime, ctime
-	from l
-	left join directory_entry_file d on l.entry_id = d.id)
-    union
-        (with l as
-	     (select dir_id, unnest(entry_ids) as entry_id
-	      from directory_list_rev
-	      where dir_id = walked_dir_id)
-        select dir_id, 'rev'::directory_entry_type as type,
-	       target, name, perms, atime, mtime, ctime
-	from l
-	left join directory_entry_rev d on l.entry_id = d.id)
-    ) order by name;
+    return query
+        with dir as (
+	    select id as dir_id, dir_entries, file_entries, rev_entries
+	    from directory
+	    where id = walked_dir_id),
+	ls_d as (select dir_id, unnest(dir_entries) as entry_id from dir),
+	ls_f as (select dir_id, unnest(file_entries) as entry_id from dir),
+	ls_r as (select dir_id, unnest(rev_entries) as entry_id from dir)
+	(select dir_id, 'dir'::directory_entry_type as type,
+	        target, name, perms, atime, mtime, ctime
+	 from ls_d
+	 left join directory_entry_dir d on ls_d.entry_id = d.id)
+        union
+        (select dir_id, 'file'::directory_entry_type as type,
+	        target, name, perms, atime, mtime, ctime
+	 from ls_f
+	 left join directory_entry_file d on ls_f.entry_id = d.id)
+        union
+        (select dir_id, 'rev'::directory_entry_type as type,
+	        target, name, perms, atime, mtime, ctime
+	 from ls_r
+	 left join directory_entry_rev d on ls_r.entry_id = d.id)
+        order by name;
     return;
 end
 $$;
 
 -- List all revision IDs starting from a given revision, going back in time
 --
 -- TODO ordering: should be breadth-first right now (what do we want?)
 -- TODO ordering: ORDER BY parent_rank somewhere?
 create or replace function swh_revision_list(root_revision sha1_git)
     returns setof sha1_git
     language plpgsql
 as $$
 begin
     return query
 	with recursive rev_list(id) as (
 	    (select id from revision where id = root_revision)
 	    union
 	    (select parent_id
 	     from revision_history as h
 	     join rev_list on h.id = rev_list.id)
 	)
 	select * from rev_list;
     return;
 end
 $$;
 
 -- Detailed entry in a revision log
 create type revision_log_entry as
 (
   id                     sha1_git,
   date                   timestamptz,
   date_offset            smallint,
   committer_date         timestamptz,
   committer_date_offset  smallint,
   type                   revision_type,
   directory              sha1_git,
   message                bytea,
   author_name            text,
   author_email           text,
   committer_name         text,
   committer_email        text
 );
 
 -- "git style" revision log. Similar to swh_revision_list(), but returning all
 -- information associated to each revision, and expanding authors/committers
 create or replace function swh_revision_log(root_revision sha1_git)
     returns setof revision_log_entry
     language plpgsql
 as $$
 begin
     return query
         select revision.id, date, date_offset,
 	    committer_date, committer_date_offset,
 	    type, directory, message,
 	    author.name as author_name, author.email as author_email,
 	    committer.name as committer_name, committer.email as committer_email
 	from swh_revision_list(root_revision) as rev_list
 	join revision on revision.id = rev_list
 	join person as author on revision.author = author.id
 	join person as committer on revision.committer = committer.id;
     return;
 end
 $$;
 
 -- List missing revisions from tmp_revision
 create or replace function swh_revision_missing()
     returns setof sha1_git
     language plpgsql
 as $$
 begin
     return query
         select id from tmp_revision
 	except
 	select id from revision;
     return;
 end
 $$;
 
 
 -- Create entries in person from tmp_revision
 create or replace function swh_person_add_from_revision()
     returns void
     language plpgsql
 as $$
 begin
     with t as (
         select author_name as name, author_email as email from tmp_revision
     union
         select committer_name as name, committer_email as email from tmp_revision
     ) insert into person (name, email)
     select distinct name, email from t
     where not exists (
         select 1
 	from person p
 	where t.name = p.name and t.email = p.email
     );
     return;
 end
 $$;
 
 -- Create entries in revision from tmp_revision
 create or replace function swh_revision_add()
     returns void
     language plpgsql
 as $$
 begin
     perform swh_person_add_from_revision();
 
     insert into revision (id, date, date_offset, committer_date, committer_date_offset, type, directory, message, author, committer)
     select t.id, t.date, t.date_offset, t.committer_date, t.committer_date_offset, t.type, t.directory, t.message, a.id, c.id
     from tmp_revision t
     left join person a on a.name = t.author_name and a.email = t.author_email
     left join person c on c.name = t.committer_name and c.email = t.committer_email;
     return;
 end
 $$;
 
 -- List missing releases from tmp_release
 create or replace function swh_release_missing()
     returns setof sha1_git
     language plpgsql
 as $$
 begin
     return query
         select id from tmp_release
 	except
 	select id from release;
     return;
 end
 $$;
 
 -- Create entries in person from tmp_release
 create or replace function swh_person_add_from_release()
     returns void
     language plpgsql
 as $$
 begin
     with t as (
         select distinct author_name as name, author_email as email from tmp_release
     ) insert into person (name, email)
     select name, email from t
     where not exists (
         select 1
 	from person p
 	where t.name = p.name and t.email = p.email
     );
     return;
 end
 $$;
 
 -- Create entries in release from tmp_release
 create or replace function swh_release_add()
     returns void
     language plpgsql
 as $$
 begin
     perform swh_person_add_from_release();
 
     insert into release (id, revision, date, date_offset, name, comment, author)
     select t.id, t.revision, t.date, t.date_offset, t.name, t.comment, a.id
     from tmp_release t
     left join person a on a.name = t.author_name and a.email = t.author_email;
     return;
 end
 $$;
 
 -- Absolute path: directory reference + complete path relative to it
 create type content_dir as (
     directory  sha1_git,
     path       unix_path
 );
 
 -- Find the containing directory of a given content, specified by sha1
 -- (note: *not* sha1_git).
 --
 -- Return a pair (dir_it, path) where path is a UNIX path that, from the
 -- directory root, reach down to a file with the desired content.
 --
 -- In case of multiple paths (i.e., pretty much always), an arbitrary one is
 -- chosen.
 create or replace function swh_content_find_directory(content_id sha1)
     returns content_dir
     language plpgsql
 as $$
 declare
     d content_dir;
 begin
     with recursive path as (
 	-- Recursively build a path from the requested content to a root
 	-- directory. Each iteration returns a pair (dir_id, filename) where
 	-- filename is relative to dir_id. Stops when no parent directory can
 	-- be found.
 	(select dir.id as dir_id, dir_entry_f.name as name, 0 as depth
 	 from directory_entry_file as dir_entry_f
 	 join content on content.sha1_git = dir_entry_f.target
-	 join directory_list_file as ls_f on ls_f.entry_ids @> array[dir_entry_f.id]
-	 join directory as dir on ls_f.dir_id = dir.id
+	 join directory as dir on dir.file_entries @> array[dir_entry_f.id]
 	 where content.sha1 = content_id
 	 limit 1)
 	union all
 	(select dir.id as dir_id,
 		(dir_entry_d.name || '/' || path.name)::unix_path as name,
 		path.depth + 1
 	 from path
 	 join directory_entry_dir as dir_entry_d on dir_entry_d.target = path.dir_id
-	 join directory_list_dir as ls_d on ls_d.entry_ids @> array[dir_entry_d.id]
-	 join directory as dir on ls_d.dir_id = dir.id
+	 join directory as dir on dir.dir_entries @> array[dir_entry_d.id]
 	 limit 1)
     )
     select dir_id, name from path order by depth desc limit 1
     into strict d;
 
     return d;
 end
 $$;
 
 -- Walk the revision history starting from a given revision, until a matching
 -- occurrence is found. Return all occurrence information.
 create or replace function swh_revision_find_occurrence(revision_id sha1_git)
     returns occurrence
     language plpgsql
 as $$
 declare
     occ occurrence%ROWTYPE;
     rev sha1_git;
 begin
     -- first check to see if revision_id is already pointed by an occurrence
     select origin, branch, revision
     from occurrence_history as occ_hist
     where occ_hist.revision = revision_id
     order by upper(occ_hist.validity)  -- TODO filter by authority?
     limit 1
     into occ;
 
     -- no occurrence point to revision_id, walk up the history
     if not found then
         -- recursively walk the history, stopping immediately before a revision
         -- pointed to by an occurrence.
 	-- TODO find a nicer way to stop at, but *including*, that revision
 	with recursive revlog as (
 	    (select revision_id as rev_id, 0 as depth)
 	    union all
 	    (select hist.parent_id as rev_id, revlog.depth + 1
 	     from revlog
 	     join revision_history as hist on hist.id = revlog.rev_id
 	     and not exists(select 1 from occurrence_history
 			    where revision = hist.parent_id)
 	     limit 1)
 	)
 	select rev_id from revlog order by depth desc limit 1
 	into strict rev;
 
 	-- as we stopped before a pointed by revision, look it up again and
 	-- return its data
 	select origin, branch, revision
 	from revision_history as rev_hist, occurrence_history as occ_hist
 	where rev_hist.id = rev
 	and occ_hist.revision = rev_hist.parent_id
 	order by upper(occ_hist.validity)  -- TODO filter by authority?
 	limit 1
 	into strict occ;  -- will fail if no occurrence is found, and that's OK
     end if;
 
     return occ;
 end
 $$;
 
 -- Occurrence of some content in a given context
 create type content_occurrence as (
     origin_type	 text,
     origin_url	 text,
     branch	 text,
     revision_id	 sha1_git,
     path	 unix_path
 );
 
 -- Given the sha1 of some content, look up an occurrence that points to a
 -- revision, which in turns reference (transitively) a tree containing the
 -- content. Answer the question: "where/when did SWH see a given content"?
 -- Return information about an arbitrary occurrence/revision/tree, with no
 -- ordering guarantee whatsoever.
 create or replace function swh_content_find_occurrence(content_id sha1)
     returns content_occurrence
     language plpgsql
 as $$
 declare
     dir content_dir;
     rev sha1_git;
     occ occurrence%ROWTYPE;
     coc content_occurrence;
 begin
     -- each step could fail if no results are found, and that's OK
     select * from swh_content_find_directory(content_id)     -- look up directory
 	into strict dir;
     select id from revision where directory = dir.directory  -- look up revision
 	limit 1
 	into strict rev;
     select * from swh_revision_find_occurrence(rev)	     -- look up occurrence
 	into strict occ;
 
     select origin.type, origin.url, occ.branch, rev, dir.path
     from origin
     where origin.id = occ.origin
     into strict coc;
 
     return coc;
 end
 $$;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
index 3112656b..9b4ab86d 100644
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -1,364 +1,341 @@
 ---
 --- Software Heritage Data Model
 ---
 
 -- drop schema if exists swh cascade;
 -- create schema swh;
 -- set search_path to swh;
 
 create table dbversion
 (
   version     int primary key,
   release     timestamptz,
   description text
 );
 
 insert into dbversion(version, release, description)
-      values(14, now(), 'Work In Progress');
+      values(15, now(), 'Work In Progress');
 
 -- a SHA1 checksum (not necessarily originating from Git)
 create domain sha1 as bytea check (length(value) = 20);
 
 -- a Git object ID, i.e., a SHA1 checksum
 create domain sha1_git as bytea check (length(value) = 20);
 
 -- a SHA256 checksum
 create domain sha256 as bytea check (length(value) = 32);
 
 -- UNIX path (absolute, relative, individual path component, etc.)
 -- TODO should this be bytea or similar to avoid encoding/decoding issues?
 create domain unix_path as text;
 
 -- a set of UNIX-like access permissions, as manipulated by, e.g., chmod
 create domain file_perms as int;
 
 create type content_status as enum ('absent', 'visible', 'hidden');
 
 -- An origin is a place, identified by an URL, where software can be found. We
 -- support different kinds of origins, e.g., git and other VCS repositories,
 -- web pages that list tarballs URLs (e.g., http://www.kernel.org), indirect
 -- tarball URLs (e.g., http://www.example.org/latest.tar.gz), etc. The key
 -- feature of an origin is that it can be *fetched* (wget, git clone, svn
 -- checkout, etc.) to retrieve all the contained software.
 create table origin
 (
   id         bigserial primary key,
   type       text, -- TODO use an enum here (?)
   url        text not null
 );
 
 -- Checksums about actual file content. Note that the content itself is not
 -- stored in the DB, but on external (key-value) storage. A single checksum is
 -- used as key there, but the other can be used to verify that we do not inject
 -- content collisions not knowingly.
 create table content
 (
   sha1      sha1 primary key,
   sha1_git  sha1_git not null,
   sha256    sha256 not null,
   length    bigint not null,
   ctime     timestamptz not null default now(),
             -- creation time, i.e. time of (first) injection into the storage
   status    content_status not null default 'visible'
 );
 
 create unique index on content(sha1_git);
 create unique index on content(sha256);
 
 -- Content we have seen but skipped for some reason. This table is
 -- separate from the content table as we might not have the sha1
 -- checksum of that data (for instance when we inject git
 -- repositories, objects that are too big will be skipped here, and we
 -- will only know their sha1_git). 'reason' contains the reason the
 -- content was skipped. origin is a nullable column allowing to find
 -- out which origin contains that skipped content.
 create table skipped_content
 (
   sha1      sha1,
   sha1_git  sha1_git,
   sha256    sha256,
   length    bigint not null,
   ctime     timestamptz not null default now(),
   status    content_status not null default 'absent',
   reason    text not null,
   origin    bigint references origin(id),
   unique (sha1, sha1_git, sha256)
 );
 
 -- those indexes support multiple NULL values.
 create unique index on skipped_content(sha1);
 create unique index on skipped_content(sha1_git);
 create unique index on skipped_content(sha256);
 
 -- An organization (or part thereof) that might be in charge of running
 -- software projects. Examples: Debian, GNU, GitHub, Apache, The Linux
 -- Foundation. The data model is hierarchical (via parent_id) and might store
 -- sub-branches of existing organizations. The key feature of an organization
 -- is that it can be *listed* to retrieve information about its content, i.e:
 -- sub-organizations, projects, origins.
 create table organization
 (
   id           bigserial primary key,
   parent_id    bigint references organization(id),
   name         text not null,
   description  text,
   homepage     text,
   list_engine  text,  -- crawler to be used to org's content
   list_url     text,  -- root URL to start the listing
   list_params  json,  -- org-specific listing parameter
   latest_list  timestamptz  -- last time the org's content has been listed
 );
 
 -- Log of all organization listings (i.e., organization crawling) that have
 -- been done in the past, or are still ongoing. Similar to fetch_history, but
 -- for organizations.
 create table list_history
 (
   id           bigserial primary key,
   organization bigint references organization(id),
   date         timestamptz not null,
   status       boolean,  -- true if and only if the listing has been successful
   result       json,     -- more detailed return value, depending on status
   stdout       text,
   stderr       text,
   duration     interval  -- fetch duration of NULL if still ongoing
 );
 
 -- Log of all origin fetches (i.e., origin crawling) that have been done in the
 -- past, or are still ongoing. Similar to list_history, but for origins.
 create table fetch_history
 (
   id        bigserial primary key,
   origin    bigint references origin(id),
   date      timestamptz not null,
   status    boolean,  -- true if and only if the fetch has been successful
   result    json,     -- more detailed returned values, times, etc...
   stdout    text,
   stderr    text,     -- null when status is true, filled otherwise
   duration  interval  -- fetch duration of NULL if still ongoing
 );
 
 -- A specific software project, e.g., the Linux kernel, Apache httpd. A
 -- software project is version-less at this level, but is associated to several
 -- metadata. Metadata can evolve over time, this table only contains the most
 -- recent version of them; for old versions of project see table
 -- project_history.
 create table project
 (
   id            bigserial primary key,
   organization  bigint references organization(id),  -- the "owning" organization
   origin        bigint references origin(id),  -- where to find project releases
   name          text,
   description   text,
   homepage      text,
   doap          jsonb
   -- other kinds of metadata/software project description ontologies can be
   -- added here, in addition to DOAP
 );
 
 -- History of project metadata. Time-sensitive version of the table project.
 create table project_history
 (
   id            bigserial primary key,
   project       bigint references project(id),
   validity      tstzrange,
   organization  bigint references organization(id),
   origin        bigint references origin(id),
   name          text,
   description   text,
   homepage      text,
   doap          jsonb
 );
 
 -- A file-system directory.  A directory is a list of directory entries (see
 -- tables: directory_entry_{dir,file}).
 --
 -- To list the contents of a directory:
--- 1. list the contained directory_entry_dir using table directory_list_dir
--- 2. list the contained directory_entry_file using table directory_list_file
--- 3. list the contained directory_entry_rev using table directory_list_rev
+-- 1. list the contained directory_entry_dir using array dir_entries
+-- 2. list the contained directory_entry_file using array file_entries
+-- 3. list the contained directory_entry_rev using array rev_entries
 -- 4. UNION
 --
 -- Synonyms/mappings:
 -- * git: tree
 create table directory
 (
-  id  sha1_git primary key
+  id            sha1_git primary key,
+  dir_entries   bigint[],  -- sub-directories, reference directory_entry_dir
+  file_entries  bigint[],  -- contained files, reference directory_entry_file
+  rev_entries   bigint[]   -- mounted revisions, reference directory_entry_rev
 );
 
+create index on directory using gin (dir_entries);
+create index on directory using gin (file_entries);
+create index on directory using gin (rev_entries);
+
 -- A directory entry pointing to a sub-directory.
 create table directory_entry_dir
 (
   id      bigserial primary key,
   target  sha1_git, -- references directory(id) deferrable initially deferred,
                         -- id of target directory
   name    unix_path,    -- path name, relative to containing dir
   perms   file_perms,   -- unix-like permissions
   atime   timestamptz,  -- time of last access
   mtime   timestamptz,  -- time of last modification
   ctime   timestamptz   -- time of last status change
 );
 
 create unique index on directory_entry_dir(target, name, perms, atime, mtime, ctime);
 create unique index on directory_entry_dir(target, name, perms)
        where atime is null and mtime is null and ctime is null;
 
--- Mapping between directories and contained sub-directories.
-create table directory_list_dir
-(
-  dir_id     sha1_git references directory(id),
-  entry_ids  bigint[],
-  primary key (dir_id)
-);
-
-create index on directory_list_dir using gin (entry_ids);
-
 -- A directory entry pointing to a file.
 create table directory_entry_file
 (
   id      bigserial primary key,
   target  sha1_git,     -- id of target file
   name    unix_path,    -- path name, relative to containing dir
   perms   file_perms,   -- unix-like permissions
   atime   timestamptz,  -- time of last access
   mtime   timestamptz,  -- time of last modification
   ctime   timestamptz   -- time of last status change
 );
 
 create unique index on directory_entry_file(target, name, perms, atime, mtime, ctime);
 create unique index on directory_entry_file(target, name, perms)
        where atime is null and mtime is null and ctime is null;
 
--- Mapping between directories and contained files.
-create table directory_list_file
-(
-  dir_id     sha1_git references directory(id),
-  entry_ids  bigint[],
-  primary key (dir_id)
-);
-
-create index on directory_list_file using gin (entry_ids);
-
 -- A directory entry pointing to a revision.
 create table directory_entry_rev
 (
   id      bigserial primary key,
   target  sha1_git,     -- id of target revision
   name    unix_path,    -- path name, relative to containing dir
   perms   file_perms,   -- unix-like permissions
   atime   timestamptz,  -- time of last access
   mtime   timestamptz,  -- time of last modification
   ctime   timestamptz   -- time of last status change
 );
 
 create unique index on directory_entry_rev(target, name, perms, atime, mtime, ctime);
 create unique index on directory_entry_rev(target, name, perms)
        where atime is null and mtime is null and ctime is null;
 
--- Mapping between directories and contained files.
-create table directory_list_rev
-(
-  dir_id     sha1_git references directory(id),
-  entry_ids  bigint[],
-  primary key (dir_id)
-);
-
-create index on directory_list_rev using gin (entry_ids);
-
 create table person
 (
   id     bigserial primary key,
   name   text not null default '',
   email  text not null default ''
 );
 
 create unique index on person(name, email);
 
 create type revision_type as enum ('git', 'tar', 'dsc');
 
 -- A snapshot of a software project at a specific point in time.
 --
 -- Synonyms/mappings:
 -- * git / subversion / etc: commit
 -- * tarball: a specific tarball
 --
 -- Revisions are organized as DAGs. Each revision points to 0, 1, or more (in
 -- case of merges) parent revisions. Each revision points to a directory, i.e.,
 -- a file-system tree containing files and directories.
 create table revision
 (
   id                    sha1_git primary key,
   date                  timestamptz,
   date_offset           smallint,
   committer_date        timestamptz,
   committer_date_offset smallint,
   type                  revision_type not null,
   directory             sha1_git,  -- file-system tree
   message               bytea,
   author                bigint references person(id),
   committer             bigint references person(id)
 );
 
 -- either this table or the sha1_git[] column on the revision table
 create table revision_history
 (
   id           sha1_git references revision(id),
   parent_id    sha1_git,
   parent_rank  int not null default 0,
     -- parent position in merge commits, 0-based
   primary key (id, parent_rank)
 );
 
 -- The content of software origins is indexed starting from top-level pointers
 -- called "branches". Every time we fetch some origin we store in this table
 -- where the branches pointed to at fetch time.
 --
 -- Synonyms/mappings:
 -- * git: ref (in the "git update-ref" sense)
 create table occurrence_history
 (
   origin     bigint references origin(id),
   branch     text,  -- e.g., "master" (for VCS), or "sid" (for Debian)
   revision   sha1_git,  -- ref target, e.g., commit id
   authority  bigint references organization(id) not null,
                       -- who is claiming to have seen the occurrence.
                       -- Note: SWH is such an authority, and has an entry in
                       -- the organization table.
   validity   tstzrange,  -- The time validity of this table entry. If the upper
                          -- bound is missing, the entry is still valid.
   exclude using gist (origin with =,
                       branch with =,
                       revision with =,
                       authority with =,
                       validity with &&),
   -- unicity exclusion constraint on lines where the same value is found for
   -- `origin`, `reference`, `revision`, `authority` and overlapping values for
   -- `validity`.
   primary key (origin, branch, revision, authority, validity)
 );
 
 -- Materialized view of occurrence_history, storing the *current* value of each
 -- branch, as last seen by SWH.
 create table occurrence
 (
   origin    bigint references origin(id),
   branch    text,
   revision  sha1_git,
   primary key(origin, branch, revision)
 );
 
 -- A "memorable" point in the development history of a project.
 --
 -- Synonyms/mappings:
 -- * git: tag (of the annotated kind, otherwise they are just references)
 -- * tarball: the release version number
 create table release
 (
   id          sha1_git primary key,
   revision    sha1_git,
   date        timestamptz,
   date_offset smallint,
   name        text,
   comment     bytea,
   author      bigint references person(id)
 );
diff --git a/sql/upgrades/015.sql b/sql/upgrades/015.sql
new file mode 100644
index 00000000..950695ea
--- /dev/null
+++ b/sql/upgrades/015.sql
@@ -0,0 +1,203 @@
+-- SWH DB schema upgrade
+-- from_version: 14
+-- to_version: 15
+-- description: merge directory_list_* tables into directory
+
+alter table directory
+    add column dir_entries  bigint[],
+    add column file_entries bigint[],
+    add column rev_entries  bigint[];
+
+with ls as (
+    -- we need an explicit sub-query here, because left joins aren't allowed in
+    -- update from_list
+    select id,
+           ls_d.entry_ids as dir_entries,
+	   ls_f.entry_ids as file_entries,
+	   ls_r.entry_ids as rev_entries
+    from directory as d
+    left join directory_list_dir  as ls_d on ls_d.dir_id = d.id
+    left join directory_list_file as ls_f on ls_f.dir_id = d.id
+    left join directory_list_rev  as ls_r on ls_r.dir_id = d.id
+)
+update directory
+    set dir_entries =  ls.dir_entries,
+        file_entries = ls.file_entries,
+	rev_entries =  ls.rev_entries
+    from ls
+    where ls.id = directory.id;
+
+create index on directory using gin (dir_entries);
+create index on directory using gin (file_entries);
+create index on directory using gin (rev_entries);
+
+drop table directory_list_dir;
+drop table directory_list_file;
+drop table directory_list_rev;
+
+create or replace function swh_directory_entry_dir_add()
+    returns void
+    language plpgsql
+as $$
+begin
+    insert into directory_entry_dir (target, name, perms, atime, mtime, ctime)
+    select distinct t.target, t.name, t.perms, t.atime, t.mtime, t.ctime
+    from tmp_directory_entry_dir t
+    where not exists (
+    select 1
+    from directory_entry_dir i
+    where t.target = i.target and t.name = i.name and t.perms = i.perms and
+       t.atime is not distinct from i.atime and
+       t.mtime is not distinct from i.mtime and
+       t.ctime is not distinct from i.ctime);
+
+    with new_entries as (
+	select t.dir_id, array_agg(i.id) as entries
+	from tmp_directory_entry_dir t
+	inner join directory_entry_dir i
+	on t.target = i.target and t.name = i.name and t.perms = i.perms and
+	   t.atime is not distinct from i.atime and
+	   t.mtime is not distinct from i.mtime and
+	   t.ctime is not distinct from i.ctime
+	group by t.dir_id
+    )
+    update directory as d
+    set dir_entries = new_entries.entries
+    from new_entries
+    where d.id = new_entries.dir_id;
+
+    return;
+end
+$$;
+
+create or replace function swh_directory_entry_file_add()
+    returns void
+    language plpgsql
+as $$
+begin
+    insert into directory_entry_file (target, name, perms, atime, mtime, ctime)
+    select distinct t.target, t.name, t.perms, t.atime, t.mtime, t.ctime
+    from tmp_directory_entry_file t
+    where not exists (
+    select 1
+    from directory_entry_file i
+    where t.target = i.target and t.name = i.name and t.perms = i.perms and
+       t.atime is not distinct from i.atime and
+       t.mtime is not distinct from i.mtime and
+       t.ctime is not distinct from i.ctime);
+
+    with new_entries as (
+	select t.dir_id, array_agg(i.id)
+	from tmp_directory_entry_file t
+	inner join directory_entry_file i
+	on t.target = i.target and t.name = i.name and t.perms = i.perms and
+	   t.atime is not distinct from i.atime and
+	   t.mtime is not distinct from i.mtime and
+	   t.ctime is not distinct from i.ctime
+	group by t.dir_id
+    )
+    update directory as d
+    set file_entries = new_entries.entries
+    from new_entries
+    where d.id = new_entries.dir_id;
+
+    return;
+end
+$$;
+
+create or replace function swh_directory_entry_rev_add()
+    returns void
+    language plpgsql
+as $$
+begin
+    insert into directory_entry_rev (target, name, perms, atime, mtime, ctime)
+    select distinct t.target, t.name, t.perms, t.atime, t.mtime, t.ctime
+    from tmp_directory_entry_rev t
+    where not exists (
+    select 1
+    from directory_entry_rev i
+    where t.target = i.target and t.name = i.name and t.perms = i.perms and
+       t.atime is not distinct from i.atime and
+       t.mtime is not distinct from i.mtime and
+       t.ctime is not distinct from i.ctime);
+
+    with new_entries as (
+	select t.dir_id, array_agg(i.id)
+	from tmp_directory_entry_rev t
+	inner join directory_entry_rev i
+	on t.target = i.target and t.name = i.name and t.perms = i.perms and
+	   t.atime is not distinct from i.atime and
+	   t.mtime is not distinct from i.mtime and
+	   t.ctime is not distinct from i.ctime
+	group by t.dir_id
+    )
+    update directory as d
+    set rev_entries = new_entries.entries
+    from new_entries
+    where d.id = new_entries.dir_id;
+
+    return;
+end
+$$;
+
+create or replace function swh_directory_walk_one(walked_dir_id sha1_git)
+    returns setof directory_entry
+    language plpgsql
+as $$
+begin
+    return query
+        with dir as (
+	    select id as dir_id, dir_entries, file_entries, rev_entries
+	    from directory
+	    where id = walked_dir_id),
+	ls_d as (select dir_id, unnest(dir_entries) as entry_id from dir),
+	ls_f as (select dir_id, unnest(file_entries) as entry_id from dir),
+	ls_r as (select dir_id, unnest(rev_entries) as entry_id from dir)
+	(select dir_id, 'dir'::directory_entry_type as type,
+	        target, name, perms, atime, mtime, ctime
+	 from ls_d
+	 left join directory_entry_dir d on ls_d.entry_id = d.id)
+        union
+        (select dir_id, 'file'::directory_entry_type as type,
+	        target, name, perms, atime, mtime, ctime
+	 from ls_f
+	 left join directory_entry_file d on ls_f.entry_id = d.id)
+        union
+        (select dir_id, 'rev'::directory_entry_type as type,
+	        target, name, perms, atime, mtime, ctime
+	 from ls_r
+	 left join directory_entry_rev d on ls_r.entry_id = d.id)
+        order by name;
+    return;
+end
+$$;
+
+create or replace function swh_content_find_directory(content_id sha1)
+    returns content_dir
+    language plpgsql
+as $$
+declare
+    d content_dir;
+begin
+    with recursive path as (
+	(select dir.id as dir_id, dir_entry_f.name as name, 0 as depth
+	 from directory_entry_file as dir_entry_f
+	 join content on content.sha1_git = dir_entry_f.target
+	 join directory as dir on dir.file_entries @> array[dir_entry_f.id]
+	 where content.sha1 = content_id
+	 limit 1)
+	union all
+	(select dir.id as dir_id,
+		(dir_entry_d.name || '/' || path.name)::unix_path as name,
+		path.depth + 1
+	 from path
+	 join directory_entry_dir as dir_entry_d on dir_entry_d.target = path.dir_id
+	 join directory as dir on dir.dir_entries @> array[dir_entry_d.id]
+	 limit 1)
+    )
+    select dir_id, name from path order by depth desc limit 1
+    into strict d;
+
+    return d;
+end
+$$;