diff --git a/contents.py b/contents.py
deleted file mode 100644
index 2cd125c..0000000
--- a/contents.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import os
-
-from swh.model.hashutil import hash_to_hex
-from swh.provenance.provenance import get_provenance
-
-
-if __name__ == "__main__":
-    conninfo = {
-        "host": "localhost",
-        "database": "new_1000",
-        "user": "postgres",
-        "password": "postgres",
-    }
-    provenance = get_provenance(conninfo)
-
-    print("content(id, date): ################################################")
-    provenance.cursor.execute("""SELECT id, date FROM content ORDER BY id""")
-    for row in provenance.cursor.fetchall():
-        print(f"{hash_to_hex(row[0])}, {row[1]}")
-    print("###################################################################")
-
-    print("content_early_in_rev(blob, rev, path): ############################")
-    provenance.cursor.execute(
-        """SELECT blob, rev, path FROM content_early_in_rev ORDER BY blob, rev, path"""
-    )
-    for row in provenance.cursor.fetchall():
-        print(f"{row[0]}, {row[1]}, {row[2]}")
-        print(f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}")
-    print("###################################################################")
-
-    print("content_in_dir(blob, dir, path): ##################################")
-    provenance.cursor.execute(
-        """SELECT blob, dir, path FROM content_in_dir ORDER BY blob, dir, path"""
-    )
-    for row in provenance.cursor.fetchall():
-        print(f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}")
-    print("###################################################################")
-
-    print("directory(id, date): ##############################################")
-    provenance.cursor.execute("""SELECT id, date FROM directory ORDER BY id""")
-    for row in provenance.cursor.fetchall():
-        print(f"{hash_to_hex(row[0])}, {row[1]}")
-    print("###################################################################")
-
-    print("directory_in_rev(dir, rev, path): #################################")
-    provenance.cursor.execute(
-        """SELECT dir, rev, path FROM directory_in_rev ORDER BY dir, rev, path"""
-    )
-    for row in provenance.cursor.fetchall():
-        print(f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}")
-    print("###################################################################")
-
-    print("revision(id, date): ###############################################")
-    provenance.cursor.execute("""SELECT id, date FROM revision ORDER BY id""")
-    for row in provenance.cursor.fetchall():
-        print(f"{hash_to_hex(row[0])}, {row[1]}")
-    print("###################################################################")
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
index c227040..7fe778d 100644
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -1,24 +1,28 @@
 from .archive import ArchiveInterface
 from .provenance import ProvenanceInterface
 from .storage.archive import ArchiveStorage
 from .postgresql.archive import ArchivePostgreSQL
 from .postgresql.db_utils import connect
 from .postgresql.provenance import ProvenancePostgreSQL
+from .postgresql_nopath.provenance import ProvenancePostgreSQLNoPath
 
 
 def get_archive(cls: str, **kwargs) -> ArchiveInterface:
     if cls == "api":
         return ArchiveStorage(**kwargs["storage"])
     elif cls == "ps":
         conn = connect(kwargs["db"])
         return ArchivePostgreSQL(conn)
     else:
         raise NotImplementedError
 
 
 def get_provenance(cls: str, **kwargs) -> ProvenanceInterface:
     if cls == "ps":
         conn = connect(kwargs["db"])
         return ProvenancePostgreSQL(conn)
+    elif cls == "ps_np":
+        conn = connect(kwargs["db"])
+        return ProvenancePostgreSQLNoPath(conn)
     else:
         raise NotImplementedError
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
index a79bd46..4a4824f 100644
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -1,201 +1,209 @@
 # Copyright (C) 2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 # WARNING: do not import unnecessary things here to keep cli startup time under
 # control
 import os
 from typing import Any, Dict, Optional
 
 import click
 import yaml
 
 from swh.core import config
 from swh.core.cli import CONTEXT_SETTINGS
 from swh.core.cli import swh as swh_cli_group
 from swh.model.hashutil import hash_to_bytes, hash_to_hex
 
 # All generic config code should reside in swh.core.config
 CONFIG_ENVVAR = "SWH_CONFIG_FILE"
 DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml")
 DEFAULT_PATH = os.environ.get(CONFIG_ENVVAR, DEFAULT_CONFIG_PATH)
 
 DEFAULT_CONFIG: Dict[str, Any] = {
     "archive": {
         "cls": "api",
         "storage": {
             "cls": "remote",
             "url": "http://uffizi.internal.softwareheritage.org:5002",
         }
         # "cls": "ps",
         # "db": {
         #     "host": "db.internal.softwareheritage.org",
         #     "dbname": "softwareheritage",
         #     "user": "guest"
         # }
     },
     "provenance": {"cls": "ps", "db": {"host": "localhost", "dbname": "provenance"}},
 }
 
 
 CONFIG_FILE_HELP = f"""Configuration file:
 
 \b
 The CLI option or the environment variable will fail if invalid.
 CLI option is checked first.
 Then, environment variable {CONFIG_ENVVAR} is checked.
 Then, if cannot load the default path, a set of default values are used.
 Default config path is {DEFAULT_CONFIG_PATH}.
 Default config values are:
 
 \b
 {yaml.dump(DEFAULT_CONFIG)}"""
 PROVENANCE_HELP = f"""Software Heritage Scanner tools.
 
 {CONFIG_FILE_HELP}"""
 
 
 @swh_cli_group.group(
     name="provenance", context_settings=CONTEXT_SETTINGS, help=PROVENANCE_HELP
 )
 @click.option(
     "-C",
     "--config-file",
     default=None,
     type=click.Path(exists=False, dir_okay=False, path_type=str),
     help="""YAML configuration file.""",
 )
 @click.option(
     "-P",
     "--profile",
     default=None,
     type=click.Path(exists=False, dir_okay=False, path_type=str),
     help="""Enable profiling to specified file.""",
 )
 @click.pass_context
 def cli(ctx, config_file: Optional[str], profile: str):
     if config_file is None and config.config_exists(DEFAULT_PATH):
         config_file = DEFAULT_PATH
 
     if config_file is None:
         conf = DEFAULT_CONFIG
     else:
         # read_raw_config do not fail on ENOENT
         if not config.config_exists(config_file):
             raise FileNotFoundError(config_file)
         conf = config.read_raw_config(config.config_basepath(config_file))
         conf = config.merge_configs(DEFAULT_CONFIG, conf)
 
     ctx.ensure_object(dict)
     ctx.obj["config"] = conf
 
     if profile:
         import cProfile
         import atexit
 
         print("Profiling...")
         pr = cProfile.Profile()
         pr.enable()
 
         def exit():
             pr.disable()
             pr.dump_stats(profile)
 
         atexit.register(exit)
 
 
 @cli.command(name="create")
 @click.option("--name", default=None)
 @click.pass_context
 def create(ctx, name):
     """Create new provenance database."""
     from .postgresql.db_utils import connect
-    from .postgresql.provenance import create_database
 
     # Connect to server without selecting a database
     conninfo = ctx.obj["config"]["provenance"]["db"]
     conn = connect(conninfo)
 
-    create_database(conn, conninfo, name)
+    if ctx.obj["config"]["provenance"]["cls"] == "ps":
+        from .postgresql.provenance import create_database
+
+        create_database(conn, conninfo, name)
+    elif ctx.obj["config"]["provenance"]["cls"] == "ps_np":
+        from .postgresql_nopath.provenance import create_database
+
+        create_database(conn, conninfo, name)
+    else:
+        raise NotImplementedError
 
 
 @cli.command(name="iter-revisions")
 @click.argument("filename")
 @click.option("-l", "--limit", type=int)
 @click.pass_context
 def iter_revisions(ctx, filename, limit):
     """Process a provided list of revisions."""
     from . import get_archive, get_provenance
     from .revision import FileRevisionIterator
     from .provenance import revision_add
 
     archive = get_archive(**ctx.obj["config"]["archive"])
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
     revisions = FileRevisionIterator(filename, archive, limit=limit)
 
     while True:
         revision = revisions.next()
         if revision is None:
             break
         revision_add(provenance, archive, revision)
 
 
 @cli.command(name="iter-origins")
 @click.argument("filename")
 @click.option("-l", "--limit", type=int)
 @click.pass_context
 def iter_origins(ctx, filename, limit):
     """Process a provided list of origins."""
     from . import get_archive, get_provenance
     from .origin import FileOriginIterator
     from .provenance import origin_add
 
     archive = get_archive(**ctx.obj["config"]["archive"])
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
 
     for origin in FileOriginIterator(filename, archive, limit=limit):
         origin_add(provenance, origin)
 
 
 @cli.command(name="find-first")
 @click.argument("swhid")
 @click.pass_context
 def find_first(ctx, swhid):
     """Find first occurrence of the requested blob."""
     from . import get_provenance
 
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
     # TODO: return a dictionary with proper keys for each field
     row = provenance.content_find_first(hash_to_bytes(swhid))
     if row is not None:
         print(
             "{blob}, {rev}, {date}, {path}".format(
                 blob=hash_to_hex(row[0]),
                 rev=hash_to_hex(row[1]),
                 date=row[2],
                 path=os.fsdecode(row[3]),
             )
         )
     else:
         print(f"Cannot find a content with the id {swhid}")
 
 
 @cli.command(name="find-all")
 @click.argument("swhid")
 @click.pass_context
 def find_all(ctx, swhid):
     """Find all occurrences of the requested blob."""
     from swh.provenance import get_provenance
 
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
     # TODO: return a dictionary with proper keys for each field
     for row in provenance.content_find_all(hash_to_bytes(swhid)):
         print(
             "{blob}, {rev}, {date}, {path}".format(
                 blob=hash_to_hex(row[0]),
                 rev=hash_to_hex(row[1]),
                 date=row[2],
                 path=os.fsdecode(row[3]),
             )
         )
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
index 61e9692..f9f227e 100644
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -1,84 +1,84 @@
 import psycopg2
 
 # import threading
 
 from ..archive import ArchiveInterface
 
 # from functools import lru_cache
 from methodtools import lru_cache
 from typing import Any, Dict, List
 
 
 class ArchivePostgreSQL(ArchiveInterface):
     def __init__(self, conn: psycopg2.extensions.connection):
         self.conn = conn
         # self.mutex = threading.Lock()
 
     def directory_ls(self, id: bytes) -> List[Dict[str, Any]]:
         # TODO: only call directory_ls_internal if the id is not being queried by
         # someone else. Otherwise wait until results get properly cached.
         # self.mutex.acquire()
         entries = self.directory_ls_internal(id)
         # self.mutex.release()
         return entries
 
     @lru_cache(maxsize=1000000)
     def directory_ls_internal(self, id: bytes) -> List[Dict[str, Any]]:
         cursor = self.conn.cursor()
         cursor.execute(
             """WITH
             dir  AS (SELECT id AS dir_id, dir_entries, file_entries, rev_entries
                         FROM directory WHERE id=%s),
-            ls_d AS (SELECT dir_id, unnest(dir_entries) AS entry_id from dir),
-            ls_f AS (SELECT dir_id, unnest(file_entries) AS entry_id from dir),
-            ls_r AS (SELECT dir_id, unnest(rev_entries) AS entry_id from dir)
+            ls_d AS (SELECT dir_id, UNNEST(dir_entries)  AS entry_id FROM dir),
+            ls_f AS (SELECT dir_id, UNNEST(file_entries) AS entry_id FROM dir),
+            ls_r AS (SELECT dir_id, UNNEST(rev_entries)  AS entry_id FROM dir)
             (SELECT 'dir'::directory_entry_type AS type, e.target, e.name,
                     NULL::sha1_git
                 FROM ls_d
                 LEFT JOIN directory_entry_dir e ON ls_d.entry_id=e.id)
             UNION
             (WITH known_contents AS
                 (SELECT 'file'::directory_entry_type AS type, e.target, e.name,
                         c.sha1_git
                     FROM ls_f
                     LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id
                     INNER JOIN content c ON e.target=c.sha1_git)
                 SELECT * FROM known_contents
                 UNION
                 (SELECT 'file'::directory_entry_type AS type, e.target, e.name,
                         c.sha1_git
                     FROM ls_f
                     LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id
                     LEFT JOIN skipped_content c ON e.target=c.sha1_git
                     WHERE NOT EXISTS (
                         SELECT 1 FROM known_contents
                             WHERE known_contents.sha1_git=e.target
                     )
                 )
             )
             ORDER BY name
             """,
             (id,),
         )
         return [
             {"type": row[0], "target": row[1], "name": row[2]}
             for row in cursor.fetchall()
         ]
 
     def iter_origins(self):
         raise NotImplementedError
 
     def iter_origin_visits(self, origin: str):
         raise NotImplementedError
 
     def iter_origin_visit_statuses(self, origin: str, visit: int):
         raise NotImplementedError
 
     def release_get(self, ids: List[bytes]):
         raise NotImplementedError
 
     def revision_get(self, ids: List[bytes]):
         raise NotImplementedError
 
     def snapshot_get_all_branches(self, snapshot: bytes):
         raise NotImplementedError
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
index 927ca4a..267ba6c 100644
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -1,503 +1,505 @@
 import itertools
 import logging
 import operator
 import os
 import psycopg2
 import psycopg2.extras
 
 from ..model import DirectoryEntry, FileEntry
 from ..origin import OriginEntry
 from .db_utils import connect, execute_sql
 from ..provenance import ProvenanceInterface
 from ..revision import RevisionEntry
 
 from datetime import datetime
 from typing import Any, Dict, Generator, List, Optional, Tuple
 
 
 def normalize(path: bytes) -> bytes:
     return path[2:] if path.startswith(bytes("." + os.path.sep, "utf-8")) else path
 
 
 def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str):
     conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
 
     # Normalize dbname to avoid issues when reconnecting below
     name = name.casefold()
 
     # Create new database dropping previous one if exists
     cursor = conn.cursor()
     cursor.execute(f"""DROP DATABASE IF EXISTS {name}""")
     cursor.execute(f"""CREATE DATABASE {name}""")
     conn.close()
 
     # Reconnect to server selecting newly created database to add tables
     conninfo["dbname"] = name
     conn = connect(conninfo)
 
     sqldir = os.path.dirname(os.path.realpath(__file__))
     execute_sql(conn, os.path.join(sqldir, "provenance.sql"))
 
 
 ########################################################################################
 ########################################################################################
 ########################################################################################
 
 
 class ProvenancePostgreSQL(ProvenanceInterface):
     def __init__(self, conn: psycopg2.extensions.connection):
         # TODO: consider adding a mutex for thread safety
         conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
         self.conn = conn
         self.cursor = self.conn.cursor()
         self.insert_cache: Dict[str, Any] = {}
         self.remove_cache: Dict[str, Any] = {}
         self.select_cache: Dict[str, Any] = {}
         self.clear_caches()
 
     def clear_caches(self):
         self.insert_cache = {
             "content": dict(),
             "content_early_in_rev": list(),
             "content_in_dir": list(),
             "directory": dict(),
             "directory_in_rev": list(),
             "revision": dict(),
             "revision_before_rev": list(),
             "revision_in_org": list(),
         }
         self.remove_cache = {"directory": dict()}
         self.select_cache = {"content": dict(), "directory": dict(), "revision": dict()}
 
     def commit(self):
         result = False
         try:
             self.insert_all()
             self.clear_caches()
             result = True
 
         except Exception as error:
             # Unexpected error occurred, rollback all changes and log message
             logging.error(f"Unexpected error: {error}")
 
         return result
 
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
     ):
         self.insert_cache["content_in_dir"].append(
             (blob.id, directory.id, normalize(os.path.join(prefix, blob.name)))
         )
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
     ):
         self.insert_cache["content_early_in_rev"].append(
             (blob.id, revision.id, normalize(os.path.join(prefix, blob.name)))
         )
 
-    def content_find_first(self, blobid: bytes) -> Tuple[bytes, bytes, datetime, bytes]:
+    def content_find_first(
+        self, blobid: bytes
+    ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]:
         self.cursor.execute(
             """SELECT content_location.sha1 AS blob,
                       revision.sha1 AS rev,
                       revision.date AS date,
                       content_location.path AS path
                  FROM (SELECT content_hex.sha1,
                               content_hex.rev,
                               location.path
                         FROM (SELECT content.sha1,
                                      content_early_in_rev.rev,
                                      content_early_in_rev.loc
                                FROM content_early_in_rev
                                JOIN content
                                  ON content.id=content_early_in_rev.blob
                                WHERE content.sha1=%s
                              ) AS content_hex
                         JOIN location
                             ON location.id=content_hex.loc
                       ) AS content_location
                  JOIN revision
                    ON revision.id=content_location.rev
                  ORDER BY date, rev, path ASC LIMIT 1""",
             (blobid,),
         )
         return self.cursor.fetchone()
 
     def content_find_all(
         self, blobid: bytes
     ) -> Generator[Tuple[bytes, bytes, datetime, bytes], None, None]:
         self.cursor.execute(
             """(SELECT content_location.sha1 AS blob,
                        revision.sha1 AS rev,
                        revision.date AS date,
                        content_location.path AS path
                  FROM (SELECT content_hex.sha1,
                               content_hex.rev,
                               location.path
                         FROM (SELECT content.sha1,
                                      content_early_in_rev.rev,
                                      content_early_in_rev.loc
                                FROM content_early_in_rev
                                JOIN content
                                  ON content.id=content_early_in_rev.blob
                                WHERE content.sha1=%s
                              ) AS content_hex
                         JOIN location
                           ON location.id=content_hex.loc
                       ) AS content_location
                  JOIN revision
                    ON revision.id=content_location.rev
                  )
                UNION
                (SELECT content_prefix.sha1 AS blob,
                        revision.sha1 AS rev,
                        revision.date AS date,
                        content_prefix.path AS path
                  FROM (SELECT content_in_rev.sha1,
                               content_in_rev.rev,
                               CASE location.path
                                 WHEN '' THEN content_in_rev.suffix
                                 WHEN '.' THEN content_in_rev.suffix
                                 ELSE (location.path || '/' ||
                                          content_in_rev.suffix)::unix_path
                               END AS path
                         FROM (SELECT content_suffix.sha1,
                                      directory_in_rev.rev,
                                      directory_in_rev.loc,
                                      content_suffix.path AS suffix
                                FROM (SELECT content_hex.sha1,
                                             content_hex.dir,
                                             location.path
                                       FROM (SELECT content.sha1,
                                                    content_in_dir.dir,
                                                    content_in_dir.loc
                                              FROM content_in_dir
                                              JOIN content
                                                ON content_in_dir.blob=content.id
                                              WHERE content.sha1=%s
                                            ) AS content_hex
                                       JOIN location
                                         ON location.id=content_hex.loc
                                     ) AS content_suffix
                                JOIN directory_in_rev
                                  ON directory_in_rev.dir=content_suffix.dir
                              ) AS content_in_rev
                         JOIN location
                           ON location.id=content_in_rev.loc
                       ) AS content_prefix
                  JOIN revision
                    ON revision.id=content_prefix.rev
                )
                ORDER BY date, rev, path""",
             (blobid, blobid),
         )
         # TODO: use POSTGRESQL EXPLAIN looking for query optimizations.
         yield from self.cursor.fetchall()
 
     def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]:
         # First check if the date is being modified by current transection.
         date = self.insert_cache["content"].get(blob.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["content"].get(blob.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM content WHERE sha1=%s""", (blob.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["content"][blob.id] = date
         return date
 
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for blob in blobs:
             # First check if the date is being modified by current transection.
             date = self.insert_cache["content"].get(blob.id, None)
             if date is not None:
                 dates[blob.id] = date
             else:
                 # If not, check whether it's been query before
                 date = self.select_cache["content"].get(blob.id, None)
                 if date is not None:
                     dates[blob.id] = date
                 else:
                     pending.append(blob.id)
         if pending:
             # Otherwise, query the database and cache the values
             values = ", ".join(itertools.repeat("%s", len(pending)))
             self.cursor.execute(
                 f"""SELECT sha1, date FROM content WHERE sha1 IN ({values})""",
                 tuple(pending),
             )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
                 self.select_cache["content"][row[0]] = row[1]
         return dates
 
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         self.insert_cache["content"][blob.id] = date
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
     ):
         self.insert_cache["directory_in_rev"].append(
             (directory.id, revision.id, normalize(path))
         )
 
     def directory_get_date_in_isochrone_frontier(
         self, directory: DirectoryEntry
     ) -> Optional[datetime]:
         # First check if the date is being modified by current transection.
         date = self.insert_cache["directory"].get(directory.id, None)
         if date is None and directory.id not in self.remove_cache["directory"]:
             # If not, check whether it's been query before
             date = self.select_cache["directory"].get(directory.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM directory WHERE sha1=%s""", (directory.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["directory"][directory.id] = date
         return date
 
     def directory_get_dates_in_isochrone_frontier(
         self, dirs: List[DirectoryEntry]
     ) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for directory in dirs:
             # First check if the date is being modified by current transection.
             date = self.insert_cache["directory"].get(directory.id, None)
             if date is not None:
                 dates[directory.id] = date
             elif directory.id not in self.remove_cache["directory"]:
                 # If not, check whether it's been query before
                 date = self.select_cache["directory"].get(directory.id, None)
                 if date is not None:
                     dates[directory.id] = date
                 else:
                     pending.append(directory.id)
         if pending:
             # Otherwise, query the database and cache the values
             values = ", ".join(itertools.repeat("%s", len(pending)))
             self.cursor.execute(
                 f"""SELECT sha1, date FROM directory WHERE sha1 IN ({values})""",
                 tuple(pending),
             )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
                 self.select_cache["directory"][row[0]] = row[1]
         return dates
 
     def directory_invalidate_in_isochrone_frontier(self, directory: DirectoryEntry):
         self.remove_cache["directory"][directory.id] = None
         self.insert_cache["directory"].pop(directory.id, None)
 
     def directory_set_date_in_isochrone_frontier(
         self, directory: DirectoryEntry, date: datetime
     ):
         self.insert_cache["directory"][directory.id] = date
         self.remove_cache["directory"].pop(directory.id, None)
 
     def insert_all(self):
         # Performe insertions with cached information
         if self.insert_cache["content"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY content;
                    INSERT INTO content(sha1, date) VALUES %s
                      ON CONFLICT (sha1) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,content.date)""",
                 self.insert_cache["content"].items(),
             )
             self.insert_cache["content"].clear()
 
         if self.insert_cache["directory"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY directory;
                    INSERT INTO directory(sha1, date) VALUES %s
                      ON CONFLICT (sha1) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,directory.date)""",
                 self.insert_cache["directory"].items(),
             )
             self.insert_cache["directory"].clear()
 
         if self.insert_cache["revision"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY revision;
                    INSERT INTO revision(sha1, date) VALUES %s
                      ON CONFLICT (sha1) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,revision.date)""",
                 self.insert_cache["revision"].items(),
             )
             self.insert_cache["revision"].clear()
 
         # Relations should come after ids for elements were resolved
         if self.insert_cache["content_early_in_rev"]:
             self.insert_location("content", "revision", "content_early_in_rev")
 
         if self.insert_cache["content_in_dir"]:
             self.insert_location("content", "directory", "content_in_dir")
 
         if self.insert_cache["directory_in_rev"]:
             self.insert_location("directory", "revision", "directory_in_rev")
 
         # if self.insert_cache["revision_before_rev"]:
         #     psycopg2.extras.execute_values(
         #         self.cursor,
         #         """INSERT INTO revision_before_rev VALUES %s
         #            ON CONFLICT DO NOTHING""",
         #         self.insert_cache["revision_before_rev"],
         #     )
         #     self.insert_cache["revision_before_rev"].clear()
 
         # if self.insert_cache["revision_in_org"]:
         #     psycopg2.extras.execute_values(
         #         self.cursor,
         #         """INSERT INTO revision_in_org VALUES %s
         #            ON CONFLICT DO NOTHING""",
         #         self.insert_cache["revision_in_org"],
         #     )
         #     self.insert_cache["revision_in_org"].clear()
 
     def insert_location(self, src0_table, src1_table, dst_table):
         # Resolve src0 ids
         src0_values = dict().fromkeys(
             map(operator.itemgetter(0), self.insert_cache[dst_table])
         )
         values = ", ".join(itertools.repeat("%s", len(src0_values)))
         self.cursor.execute(
             f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({values})""",
             tuple(src0_values),
         )
         src0_values = dict(self.cursor.fetchall())
 
         # Resolve src1 ids
         src1_values = dict().fromkeys(
             map(operator.itemgetter(1), self.insert_cache[dst_table])
         )
         values = ", ".join(itertools.repeat("%s", len(src1_values)))
         self.cursor.execute(
             f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({values})""",
             tuple(src1_values),
         )
         src1_values = dict(self.cursor.fetchall())
 
         # Resolve location ids
         location = dict().fromkeys(
             map(operator.itemgetter(2), self.insert_cache[dst_table])
         )
         location = dict(
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY location;
                    INSERT INTO location(path) VALUES %s
                      ON CONFLICT (path) DO
                        UPDATE SET path=EXCLUDED.path
                      RETURNING path, id""",
                 map(lambda path: (path,), location.keys()),
                 fetch=True,
             )
         )
 
         # Insert values in dst_table
         rows = map(
             lambda row: (src0_values[row[0]], src1_values[row[1]], location[row[2]]),
             self.insert_cache[dst_table],
         )
         psycopg2.extras.execute_values(
             self.cursor,
             f"""INSERT INTO {dst_table} VALUES %s
                   ON CONFLICT DO NOTHING""",
             rows,
         )
         self.insert_cache[dst_table].clear()
 
     def origin_get_id(self, origin: OriginEntry) -> int:
         if origin.id is None:
             # Insert origin in the DB and return the assigned id
             self.cursor.execute(
                 """INSERT INTO origin (url) VALUES (%s)
                      ON CONFLICT DO NOTHING
                      RETURNING id""",
                 (origin.url,),
             )
             return self.cursor.fetchone()[0]
         else:
             return origin.id
 
     def revision_add(self, revision: RevisionEntry):
         # Add current revision to the compact DB
         self.insert_cache["revision"][revision.id] = revision.date
 
     def revision_add_before_revision(
         self, relative: RevisionEntry, revision: RevisionEntry
     ):
         self.insert_cache["revision_before_rev"].append((revision.id, relative.id))
 
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         self.insert_cache["revision_in_org"].append((revision.id, origin.id))
 
     def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]:
         date = self.insert_cache["revision"].get(revision.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["revision"].get(revision.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM revision WHERE sha1=%s""", (revision.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["revision"][revision.id] = date
         return date
 
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT COALESCE(org,0) FROM revision WHERE sha1=%s""", (revision.id,)
         )
         row = self.cursor.fetchone()
         # None means revision is not in database;
         # 0 means revision has no prefered origin
         return row[0] if row is not None and row[0] != 0 else None
 
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT 1
                  FROM revision_before_rev
                  JOIN revision
                    ON revision.id=revision_before_rev.prev
                  WHERE revision.sha1=%s""",
             (revision.id,),
         )
         return self.cursor.fetchone() is not None
 
     def revision_set_prefered_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ):
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """UPDATE revision SET org=%s WHERE sha1=%s""", (origin.id, revision.id)
         )
 
     def revision_visited(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT 1
                  FROM revision_in_org
                  JOIN revision
                    ON revision.id=revision_in_org.rev
                  WHERE revision.sha1=%s""",
             (revision.id,),
         )
         return self.cursor.fetchone() is not None
diff --git a/swh/provenance/postgresql_nopath/__init__.py b/swh/provenance/postgresql_nopath/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql_nopath/provenance.py
similarity index 72%
copy from swh/provenance/postgresql/provenance.py
copy to swh/provenance/postgresql_nopath/provenance.py
index 927ca4a..7728c8d 100644
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql_nopath/provenance.py
@@ -1,503 +1,442 @@
 import itertools
 import logging
 import operator
 import os
 import psycopg2
 import psycopg2.extras
 
 from ..model import DirectoryEntry, FileEntry
 from ..origin import OriginEntry
-from .db_utils import connect, execute_sql
+from ..postgresql.db_utils import connect, execute_sql
 from ..provenance import ProvenanceInterface
 from ..revision import RevisionEntry
 
 from datetime import datetime
 from typing import Any, Dict, Generator, List, Optional, Tuple
 
 
-def normalize(path: bytes) -> bytes:
-    return path[2:] if path.startswith(bytes("." + os.path.sep, "utf-8")) else path
-
-
 def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str):
     conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
 
     # Normalize dbname to avoid issues when reconnecting below
     name = name.casefold()
 
     # Create new database dropping previous one if exists
     cursor = conn.cursor()
     cursor.execute(f"""DROP DATABASE IF EXISTS {name}""")
     cursor.execute(f"""CREATE DATABASE {name}""")
     conn.close()
 
     # Reconnect to server selecting newly created database to add tables
     conninfo["dbname"] = name
     conn = connect(conninfo)
 
     sqldir = os.path.dirname(os.path.realpath(__file__))
     execute_sql(conn, os.path.join(sqldir, "provenance.sql"))
 
 
 ########################################################################################
 ########################################################################################
 ########################################################################################
 
 
-class ProvenancePostgreSQL(ProvenanceInterface):
+class ProvenancePostgreSQLNoPath(ProvenanceInterface):
     def __init__(self, conn: psycopg2.extensions.connection):
         # TODO: consider adding a mutex for thread safety
         conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
         self.conn = conn
         self.cursor = self.conn.cursor()
         self.insert_cache: Dict[str, Any] = {}
         self.remove_cache: Dict[str, Any] = {}
         self.select_cache: Dict[str, Any] = {}
         self.clear_caches()
 
     def clear_caches(self):
         self.insert_cache = {
             "content": dict(),
-            "content_early_in_rev": list(),
-            "content_in_dir": list(),
+            "content_early_in_rev": set(),
+            "content_in_dir": set(),
             "directory": dict(),
-            "directory_in_rev": list(),
+            "directory_in_rev": set(),
             "revision": dict(),
             "revision_before_rev": list(),
             "revision_in_org": list(),
         }
         self.remove_cache = {"directory": dict()}
         self.select_cache = {"content": dict(), "directory": dict(), "revision": dict()}
 
     def commit(self):
         result = False
         try:
             self.insert_all()
             self.clear_caches()
             result = True
 
         except Exception as error:
             # Unexpected error occurred, rollback all changes and log message
             logging.error(f"Unexpected error: {error}")
 
         return result
 
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
     ):
-        self.insert_cache["content_in_dir"].append(
-            (blob.id, directory.id, normalize(os.path.join(prefix, blob.name)))
-        )
+        self.insert_cache["content_in_dir"].add((blob.id, directory.id))
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
     ):
-        self.insert_cache["content_early_in_rev"].append(
-            (blob.id, revision.id, normalize(os.path.join(prefix, blob.name)))
-        )
+        self.insert_cache["content_early_in_rev"].add((blob.id, revision.id))
 
-    def content_find_first(self, blobid: bytes) -> Tuple[bytes, bytes, datetime, bytes]:
+    def content_find_first(
+        self, blobid: bytes
+    ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]:
         self.cursor.execute(
-            """SELECT content_location.sha1 AS blob,
-                      revision.sha1 AS rev,
-                      revision.date AS date,
-                      content_location.path AS path
-                 FROM (SELECT content_hex.sha1,
-                              content_hex.rev,
-                              location.path
-                        FROM (SELECT content.sha1,
-                                     content_early_in_rev.rev,
-                                     content_early_in_rev.loc
-                               FROM content_early_in_rev
-                               JOIN content
-                                 ON content.id=content_early_in_rev.blob
-                               WHERE content.sha1=%s
-                             ) AS content_hex
-                        JOIN location
-                            ON location.id=content_hex.loc
-                      ) AS content_location
+            """SELECT revision.sha1 AS rev,
+                      revision.date AS date
+                 FROM (SELECT content_early_in_rev.rev
+                          FROM content_early_in_rev
+                          JOIN content
+                            ON content.id=content_early_in_rev.blob
+                          WHERE content.sha1=%s
+                      ) AS content_in_rev
                  JOIN revision
-                   ON revision.id=content_location.rev
-                 ORDER BY date, rev, path ASC LIMIT 1""",
+                   ON revision.id=content_in_rev.rev
+                 ORDER BY date, rev ASC LIMIT 1""",
             (blobid,),
         )
-        return self.cursor.fetchone()
+        row = self.cursor.fetchone()
+        if row is not None:
+            # TODO: query revision from the archive and look for blobid into a
+            # recursive directory_ls of the revision's root.
+            return blobid, row[0], row[1], b""
+        return None
 
     def content_find_all(
         self, blobid: bytes
     ) -> Generator[Tuple[bytes, bytes, datetime, bytes], None, None]:
         self.cursor.execute(
-            """(SELECT content_location.sha1 AS blob,
-                       revision.sha1 AS rev,
-                       revision.date AS date,
-                       content_location.path AS path
-                 FROM (SELECT content_hex.sha1,
-                              content_hex.rev,
-                              location.path
-                        FROM (SELECT content.sha1,
-                                     content_early_in_rev.rev,
-                                     content_early_in_rev.loc
-                               FROM content_early_in_rev
-                               JOIN content
-                                 ON content.id=content_early_in_rev.blob
-                               WHERE content.sha1=%s
-                             ) AS content_hex
-                        JOIN location
-                          ON location.id=content_hex.loc
-                      ) AS content_location
+            """(SELECT revision.sha1 AS rev,
+                       revision.date AS date
+                 FROM (SELECT content_early_in_rev.rev
+                          FROM content_early_in_rev
+                          JOIN content
+                            ON content.id=content_early_in_rev.blob
+                          WHERE content.sha1=%s
+                      ) AS content_in_rev
                  JOIN revision
-                   ON revision.id=content_location.rev
+                   ON revision.id=content_in_rev.rev
                  )
                UNION
-               (SELECT content_prefix.sha1 AS blob,
-                       revision.sha1 AS rev,
-                       revision.date AS date,
-                       content_prefix.path AS path
-                 FROM (SELECT content_in_rev.sha1,
-                              content_in_rev.rev,
-                              CASE location.path
-                                WHEN '' THEN content_in_rev.suffix
-                                WHEN '.' THEN content_in_rev.suffix
-                                ELSE (location.path || '/' ||
-                                         content_in_rev.suffix)::unix_path
-                              END AS path
-                        FROM (SELECT content_suffix.sha1,
-                                     directory_in_rev.rev,
-                                     directory_in_rev.loc,
-                                     content_suffix.path AS suffix
-                               FROM (SELECT content_hex.sha1,
-                                            content_hex.dir,
-                                            location.path
-                                      FROM (SELECT content.sha1,
-                                                   content_in_dir.dir,
-                                                   content_in_dir.loc
-                                             FROM content_in_dir
-                                             JOIN content
-                                               ON content_in_dir.blob=content.id
-                                             WHERE content.sha1=%s
-                                           ) AS content_hex
-                                      JOIN location
-                                        ON location.id=content_hex.loc
-                                    ) AS content_suffix
-                               JOIN directory_in_rev
-                                 ON directory_in_rev.dir=content_suffix.dir
-                             ) AS content_in_rev
-                        JOIN location
-                          ON location.id=content_in_rev.loc
-                      ) AS content_prefix
+               (SELECT revision.sha1 AS rev,
+                       revision.date AS date
+                 FROM (SELECT directory_in_rev.rev
+                          FROM (SELECT content_in_dir.dir
+                                   FROM content_in_dir
+                                   JOIN content
+                                     ON content_in_dir.blob=content.id
+                                   WHERE content.sha1=%s
+                               ) AS content_dir
+                          JOIN directory_in_rev
+                            ON directory_in_rev.dir=content_dir.dir
+                      ) AS content_in_rev
                  JOIN revision
-                   ON revision.id=content_prefix.rev
+                   ON revision.id=content_in_rev.rev
                )
-               ORDER BY date, rev, path""",
+               ORDER BY date, rev""",
             (blobid, blobid),
         )
         # TODO: use POSTGRESQL EXPLAIN looking for query optimizations.
-        yield from self.cursor.fetchall()
+        for row in self.cursor.fetchall():
+            # TODO: query revision from the archive and look for blobid into a
+            # recursive directory_ls of the revision's root.
+            yield blobid, row[0], row[1], b""
 
     def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]:
         # First check if the date is being modified by current transection.
         date = self.insert_cache["content"].get(blob.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["content"].get(blob.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM content WHERE sha1=%s""", (blob.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["content"][blob.id] = date
         return date
 
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for blob in blobs:
             # First check if the date is being modified by current transection.
             date = self.insert_cache["content"].get(blob.id, None)
             if date is not None:
                 dates[blob.id] = date
             else:
                 # If not, check whether it's been query before
                 date = self.select_cache["content"].get(blob.id, None)
                 if date is not None:
                     dates[blob.id] = date
                 else:
                     pending.append(blob.id)
         if pending:
             # Otherwise, query the database and cache the values
             values = ", ".join(itertools.repeat("%s", len(pending)))
             self.cursor.execute(
                 f"""SELECT sha1, date FROM content WHERE sha1 IN ({values})""",
                 tuple(pending),
             )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
                 self.select_cache["content"][row[0]] = row[1]
         return dates
 
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         self.insert_cache["content"][blob.id] = date
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
     ):
-        self.insert_cache["directory_in_rev"].append(
-            (directory.id, revision.id, normalize(path))
-        )
+        self.insert_cache["directory_in_rev"].add((directory.id, revision.id))
 
     def directory_get_date_in_isochrone_frontier(
         self, directory: DirectoryEntry
     ) -> Optional[datetime]:
         # First check if the date is being modified by current transection.
         date = self.insert_cache["directory"].get(directory.id, None)
         if date is None and directory.id not in self.remove_cache["directory"]:
             # If not, check whether it's been query before
             date = self.select_cache["directory"].get(directory.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM directory WHERE sha1=%s""", (directory.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["directory"][directory.id] = date
         return date
 
     def directory_get_dates_in_isochrone_frontier(
         self, dirs: List[DirectoryEntry]
     ) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for directory in dirs:
             # First check if the date is being modified by current transection.
             date = self.insert_cache["directory"].get(directory.id, None)
             if date is not None:
                 dates[directory.id] = date
             elif directory.id not in self.remove_cache["directory"]:
                 # If not, check whether it's been query before
                 date = self.select_cache["directory"].get(directory.id, None)
                 if date is not None:
                     dates[directory.id] = date
                 else:
                     pending.append(directory.id)
         if pending:
             # Otherwise, query the database and cache the values
             values = ", ".join(itertools.repeat("%s", len(pending)))
             self.cursor.execute(
                 f"""SELECT sha1, date FROM directory WHERE sha1 IN ({values})""",
                 tuple(pending),
             )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
                 self.select_cache["directory"][row[0]] = row[1]
         return dates
 
     def directory_invalidate_in_isochrone_frontier(self, directory: DirectoryEntry):
         self.remove_cache["directory"][directory.id] = None
         self.insert_cache["directory"].pop(directory.id, None)
 
     def directory_set_date_in_isochrone_frontier(
         self, directory: DirectoryEntry, date: datetime
     ):
         self.insert_cache["directory"][directory.id] = date
         self.remove_cache["directory"].pop(directory.id, None)
 
     def insert_all(self):
         # Performe insertions with cached information
         if self.insert_cache["content"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY content;
                    INSERT INTO content(sha1, date) VALUES %s
                      ON CONFLICT (sha1) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,content.date)""",
                 self.insert_cache["content"].items(),
             )
             self.insert_cache["content"].clear()
 
         if self.insert_cache["directory"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY directory;
                    INSERT INTO directory(sha1, date) VALUES %s
                      ON CONFLICT (sha1) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,directory.date)""",
                 self.insert_cache["directory"].items(),
             )
             self.insert_cache["directory"].clear()
 
         if self.insert_cache["revision"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """LOCK TABLE ONLY revision;
                    INSERT INTO revision(sha1, date) VALUES %s
                      ON CONFLICT (sha1) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,revision.date)""",
                 self.insert_cache["revision"].items(),
             )
             self.insert_cache["revision"].clear()
 
         # Relations should come after ids for elements were resolved
         if self.insert_cache["content_early_in_rev"]:
             self.insert_location("content", "revision", "content_early_in_rev")
 
         if self.insert_cache["content_in_dir"]:
             self.insert_location("content", "directory", "content_in_dir")
 
         if self.insert_cache["directory_in_rev"]:
             self.insert_location("directory", "revision", "directory_in_rev")
 
         # if self.insert_cache["revision_before_rev"]:
         #     psycopg2.extras.execute_values(
         #         self.cursor,
         #         """INSERT INTO revision_before_rev VALUES %s
         #            ON CONFLICT DO NOTHING""",
         #         self.insert_cache["revision_before_rev"],
         #     )
         #     self.insert_cache["revision_before_rev"].clear()
 
         # if self.insert_cache["revision_in_org"]:
         #     psycopg2.extras.execute_values(
         #         self.cursor,
         #         """INSERT INTO revision_in_org VALUES %s
         #            ON CONFLICT DO NOTHING""",
         #         self.insert_cache["revision_in_org"],
         #     )
         #     self.insert_cache["revision_in_org"].clear()
 
     def insert_location(self, src0_table, src1_table, dst_table):
         # Resolve src0 ids
         src0_values = dict().fromkeys(
             map(operator.itemgetter(0), self.insert_cache[dst_table])
         )
         values = ", ".join(itertools.repeat("%s", len(src0_values)))
         self.cursor.execute(
             f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({values})""",
             tuple(src0_values),
         )
         src0_values = dict(self.cursor.fetchall())
 
         # Resolve src1 ids
         src1_values = dict().fromkeys(
             map(operator.itemgetter(1), self.insert_cache[dst_table])
         )
         values = ", ".join(itertools.repeat("%s", len(src1_values)))
         self.cursor.execute(
             f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({values})""",
             tuple(src1_values),
         )
         src1_values = dict(self.cursor.fetchall())
 
-        # Resolve location ids
-        location = dict().fromkeys(
-            map(operator.itemgetter(2), self.insert_cache[dst_table])
-        )
-        location = dict(
-            psycopg2.extras.execute_values(
-                self.cursor,
-                """LOCK TABLE ONLY location;
-                   INSERT INTO location(path) VALUES %s
-                     ON CONFLICT (path) DO
-                       UPDATE SET path=EXCLUDED.path
-                     RETURNING path, id""",
-                map(lambda path: (path,), location.keys()),
-                fetch=True,
-            )
-        )
-
         # Insert values in dst_table
         rows = map(
-            lambda row: (src0_values[row[0]], src1_values[row[1]], location[row[2]]),
+            lambda row: (src0_values[row[0]], src1_values[row[1]]),
             self.insert_cache[dst_table],
         )
         psycopg2.extras.execute_values(
             self.cursor,
             f"""INSERT INTO {dst_table} VALUES %s
                   ON CONFLICT DO NOTHING""",
             rows,
         )
         self.insert_cache[dst_table].clear()
 
     def origin_get_id(self, origin: OriginEntry) -> int:
         if origin.id is None:
             # Insert origin in the DB and return the assigned id
             self.cursor.execute(
                 """INSERT INTO origin (url) VALUES (%s)
                      ON CONFLICT DO NOTHING
                      RETURNING id""",
                 (origin.url,),
             )
             return self.cursor.fetchone()[0]
         else:
             return origin.id
 
     def revision_add(self, revision: RevisionEntry):
         # Add current revision to the compact DB
         self.insert_cache["revision"][revision.id] = revision.date
 
     def revision_add_before_revision(
         self, relative: RevisionEntry, revision: RevisionEntry
     ):
         self.insert_cache["revision_before_rev"].append((revision.id, relative.id))
 
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         self.insert_cache["revision_in_org"].append((revision.id, origin.id))
 
     def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]:
         date = self.insert_cache["revision"].get(revision.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["revision"].get(revision.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM revision WHERE sha1=%s""", (revision.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["revision"][revision.id] = date
         return date
 
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT COALESCE(org,0) FROM revision WHERE sha1=%s""", (revision.id,)
         )
         row = self.cursor.fetchone()
         # None means revision is not in database;
         # 0 means revision has no prefered origin
         return row[0] if row is not None and row[0] != 0 else None
 
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT 1
                  FROM revision_before_rev
                  JOIN revision
                    ON revision.id=revision_before_rev.prev
                  WHERE revision.sha1=%s""",
             (revision.id,),
         )
         return self.cursor.fetchone() is not None
 
     def revision_set_prefered_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ):
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """UPDATE revision SET org=%s WHERE sha1=%s""", (origin.id, revision.id)
         )
 
     def revision_visited(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT 1
                  FROM revision_in_org
                  JOIN revision
                    ON revision.id=revision_in_org.rev
                  WHERE revision.sha1=%s""",
             (revision.id,),
         )
         return self.cursor.fetchone() is not None
diff --git a/swh/provenance/postgresql_nopath/provenance.sql b/swh/provenance/postgresql_nopath/provenance.sql
new file mode 100644
index 0000000..7d7f4ae
--- /dev/null
+++ b/swh/provenance/postgresql_nopath/provenance.sql
@@ -0,0 +1,130 @@
+-- a Git object ID, i.e., a Git-style salted SHA1 checksum
+drop domain if exists sha1_git cascade;
+create domain sha1_git as bytea check (length(value) = 20);
+
+-- UNIX path (absolute, relative, individual path component, etc.)
+drop domain if exists unix_path cascade;
+create domain unix_path as bytea;
+
+
+drop table if exists content;
+create table content
+(
+    id      bigserial primary key,      -- internal identifier of the content blob
+    sha1    sha1_git unique not null,   -- intrinsic identifier of the content blob
+    date    timestamptz not null        -- timestamp of the revision where the blob appears early
+);
+
+comment on column content.id is 'Content internal identifier';
+comment on column content.sha1 is 'Content intrinsic identifier';
+comment on column content.date is 'Earliest timestamp for the content (first seen time)';
+
+
+drop table if exists content_early_in_rev;
+create table content_early_in_rev
+(
+    blob    bigint not null,            -- internal identifier of the content blob
+    rev     bigint not null,            -- internal identifier of the revision where the blob appears for the first time
+    primary key (blob, rev)
+    -- foreign key (blob) references content (id),
+    -- foreign key (rev) references revision (id)
+);
+
+comment on column content_early_in_rev.blob is 'Content internal identifier';
+comment on column content_early_in_rev.rev is 'Revision internal identifier';
+
+
+drop table if exists content_in_dir;
+create table content_in_dir
+(
+    blob    bigint not null,            -- internal identifier of the content blob
+    dir     bigint not null,            -- internal identifier of the directory contaning the blob
+    primary key (blob, dir)
+    -- foreign key (blob) references content (id),
+    -- foreign key (dir) references directory (id)
+);
+
+comment on column content_in_dir.blob is 'Content internal identifier';
+comment on column content_in_dir.dir is 'Directory internal identifier';
+
+
+drop table if exists directory;
+create table directory
+(
+    id      bigserial primary key,      -- internal identifier of the directory appearing in an isochrone inner frontier
+    sha1    sha1_git unique not null,   -- intrinsic identifier of the directory
+    date    timestamptz not null        -- max timestamp among those of the directory children's
+);
+
+comment on column directory.id is 'Directory internal identifier';
+comment on column directory.sha1 is 'Directory intrinsic identifier';
+comment on column directory.date is 'Latest timestamp for the content in the directory';
+
+
+drop table if exists directory_in_rev;
+create table directory_in_rev
+(
+    dir     bigint not null,            -- internal identifier of the directory appearing in the revision
+    rev     bigint not null,            -- internal identifier of the revision containing the directory
+    primary key (dir, rev)
+    -- foreign key (dir) references directory (id),
+    -- foreign key (rev) references revision (id)
+);
+
+comment on column directory_in_rev.dir is 'Directory internal identifier';
+comment on column directory_in_rev.rev is 'Revision internal identifier';
+
+
+drop table if exists origin;
+create table origin
+(
+    id      bigserial primary key,      -- internal identifier of the origin
+    url     unix_path unique not null   -- url of the origin
+);
+
+comment on column origin.id is 'Origin internal identifier';
+comment on column origin.url is 'URL of the origin';
+
+
+drop table if exists revision;
+create table revision
+(
+    id      bigserial primary key,      -- internal identifier of the revision
+    sha1    sha1_git unique not null,   -- intrinsic identifier of the revision
+    date    timestamptz not null,       -- timestamp of the revision
+    org     bigint                      -- id of the prefered origin
+    -- foreign key (org) references origin (id)
+);
+
+comment on column revision.id is 'Revision internal identifier';
+comment on column revision.sha1 is 'Revision intrinsic identifier';
+comment on column revision.date is 'Revision timestamp';
+comment on column revision.org is 'Prefered origin for the revision';
+
+
+drop table if exists revision_before_rev;
+create table revision_before_rev
+(
+    prev    bigserial not null,         -- internal identifier of the source revision
+    next    bigserial not null,         -- internal identifier of the destination revision
+    primary key (prev, next)
+    -- foreign key (prev) references revision (id),
+    -- foreign key (next) references revision (id)
+);
+
+comment on column revision_before_rev.prev is 'Source revision internal identifier';
+comment on column revision_before_rev.next is 'Destination revision internal identifier';
+
+
+drop table if exists revision_in_org;
+create table revision_in_org
+(
+    rev     bigint not null,            -- internal identifier of the revision poined by the origin
+    org     bigint not null,            -- internal identifier of the origin that points to the revision
+    primary key (rev, org)
+    -- foreign key (rev) references revision (id),
+    -- foreign key (org) references origin (id)
+);
+
+comment on column revision_in_org.rev is 'Revision internal identifier';
+comment on column revision_in_org.org is 'Origin internal identifier';
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
index 26025a8..ccf2317 100644
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -1,425 +1,427 @@
 import os
 
 from .archive import ArchiveInterface
 from .model import DirectoryEntry, FileEntry
 from .origin import OriginEntry
 from .revision import RevisionEntry
 
 from datetime import datetime
 from typing import Dict, Generator, List, Optional, Tuple
 
 
 # TODO: consider moving to path utils file together with normalize.
 def is_child(path: bytes, prefix: bytes) -> bool:
     return path != prefix and os.path.dirname(path) == prefix
 
 
 class ProvenanceInterface:
     def __init__(self, **kwargs):
         raise NotImplementedError
 
     def commit(self):
         raise NotImplementedError
 
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
     ):
         raise NotImplementedError
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
     ):
         raise NotImplementedError
 
-    def content_find_first(self, blobid: bytes) -> Tuple[bytes, bytes, datetime, bytes]:
+    def content_find_first(
+        self, blobid: bytes
+    ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]:
         raise NotImplementedError
 
     def content_find_all(
         self, blobid: bytes
     ) -> Generator[Tuple[bytes, bytes, datetime, bytes], None, None]:
         raise NotImplementedError
 
     def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]:
         raise NotImplementedError
 
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         raise NotImplementedError
 
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         raise NotImplementedError
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
     ):
         raise NotImplementedError
 
     def directory_get_date_in_isochrone_frontier(
         self, directory: DirectoryEntry
     ) -> Optional[datetime]:
         raise NotImplementedError
 
     def directory_get_dates_in_isochrone_frontier(
         self, dirs: List[DirectoryEntry]
     ) -> Dict[bytes, datetime]:
         raise NotImplementedError
 
     def directory_invalidate_in_isochrone_frontier(self, directory: DirectoryEntry):
         raise NotImplementedError
 
     def directory_set_date_in_isochrone_frontier(
         self, directory: DirectoryEntry, date: datetime
     ):
         raise NotImplementedError
 
     def origin_get_id(self, origin: OriginEntry) -> int:
         raise NotImplementedError
 
     def revision_add(self, revision: RevisionEntry):
         raise NotImplementedError
 
     def revision_add_before_revision(
         self, relative: RevisionEntry, revision: RevisionEntry
     ):
         raise NotImplementedError
 
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         raise NotImplementedError
 
     def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]:
         raise NotImplementedError
 
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         raise NotImplementedError
 
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         raise NotImplementedError
 
     def revision_set_prefered_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ):
         raise NotImplementedError
 
     def revision_visited(self, revision: RevisionEntry) -> bool:
         raise NotImplementedError
 
 
 def directory_process_content(
     provenance: ProvenanceInterface, directory: DirectoryEntry, relative: DirectoryEntry
 ):
     stack = [(directory, b"")]
 
     while stack:
         current, prefix = stack.pop()
 
         for child in iter(current):
             if isinstance(child, FileEntry):
                 # Add content to the relative directory with the computed prefix.
                 provenance.content_add_to_directory(relative, child, prefix)
             else:
                 # Recursively walk the child directory.
                 stack.append((child, os.path.join(prefix, child.name)))
 
 
 def directory_update_content(
     stack: List[Tuple[DirectoryEntry, bytes]],
     provenance: ProvenanceInterface,
     revision: RevisionEntry,
     directory: DirectoryEntry,
     prefix: bytes,
     subdirs: Optional[List[DirectoryEntry]] = None,
     blobs: Optional[List[FileEntry]] = None,
     blobdates: Optional[Dict[bytes, datetime]] = None,
 ):
     assert revision.date is not None
 
     # Init optional parameters if not provided.
     if subdirs is None:
         subdirs = [child for child in directory if isinstance(child, DirectoryEntry)]
 
     if blobs is None:
         blobs = [child for child in directory if isinstance(child, FileEntry)]
 
     if blobdates is None:
         blobdates = provenance.content_get_early_dates(blobs)
 
     # Iterate over blobs updating their date if necessary.
     for blob in blobs:
         date = blobdates.get(blob.id, None)
         if date is None or revision.date < date:
             provenance.content_set_early_date(blob, revision.date)
 
     # Push all subdirectories with its corresponding path to analyze them
     # recursively.
     for subdir in subdirs:
         stack.append((subdir, os.path.join(prefix, subdir.name)))
 
 
 def origin_add(provenance: ProvenanceInterface, origin: OriginEntry):
     # TODO: refactor to iterate over origin visit statuses and commit only once
     # per status.
     origin.id = provenance.origin_get_id(origin)
 
     for revision in origin.revisions:
         origin_add_revision(provenance, origin, revision)
 
         # Commit after each revision
         provenance.commit()  # TODO: verify this!
 
 
 def origin_add_revision(
     provenance: ProvenanceInterface, origin: OriginEntry, revision: RevisionEntry
 ):
     stack: List[Tuple[Optional[RevisionEntry], RevisionEntry]] = [(None, revision)]
 
     while stack:
         relative, current = stack.pop()
 
         # Check if current revision has no prefered origin and update if necessary.
         prefered = provenance.revision_get_prefered_origin(current)
 
         if prefered is None:
             provenance.revision_set_prefered_origin(origin, current)
         ########################################################################
 
         if relative is None:
             # This revision is pointed directly by the origin.
             visited = provenance.revision_visited(current)
             provenance.revision_add_to_origin(origin, current)
 
             if not visited:
                 stack.append((current, current))
 
         else:
             # This revision is a parent of another one in the history of the
             # relative revision.
             for parent in iter(current):
                 visited = provenance.revision_visited(parent)
 
                 if not visited:
                     # The parent revision has never been seen before pointing
                     # directly to an origin.
                     known = provenance.revision_in_history(parent)
 
                     if known:
                         # The parent revision is already known in some other
                         # revision's history. We should point it directly to
                         # the origin and (eventually) walk its history.
                         stack.append((None, parent))
                     else:
                         # The parent revision was never seen before. We should
                         # walk its history and associate it with the same
                         # relative revision.
                         provenance.revision_add_before_revision(relative, parent)
                         stack.append((relative, parent))
                 else:
                     # The parent revision already points to an origin, so its
                     # history was properly processed before. We just need to
                     # make sure it points to the current origin as well.
                     provenance.revision_add_to_origin(origin, parent)
 
 
 def revision_add(
     provenance: ProvenanceInterface, archive: ArchiveInterface, revision: RevisionEntry
 ):
     assert revision.date is not None
     assert revision.root is not None
 
     # Processed content starting from the revision's root directory.
     date = provenance.revision_get_early_date(revision)
     if date is None or revision.date < date:
         provenance.revision_add(revision)
         revision_process_content(
             provenance, revision, DirectoryEntry(archive, revision.root, b"")
         )
 
     # TODO: improve this! Maybe using a max attempt counter?
     # Idealy Provenance class should guarante that a commit never fails.
     while not provenance.commit():
         continue
 
 
 def revision_process_content(
     provenance: ProvenanceInterface, revision: RevisionEntry, root: DirectoryEntry
 ):
     assert revision.date is not None
 
     # Stack of directories (and their paths) to be processed.
     stack: List[Tuple[DirectoryEntry, bytes]] = [(root, root.name)]
     # This dictionary will hold the computed dates for visited subdirectories inside the
     # isochrone frontier.
     innerdirs: Dict[bytes, Tuple[DirectoryEntry, datetime]] = {}
     # This dictionary will hold the computed dates for visited subdirectories outside
     # the isochrone frontier which are candidates to be added to the outer frontier (if
     # their parent is in the inner frontier).
     outerdirs: Dict[bytes, Tuple[DirectoryEntry, datetime]] = {}
 
     while stack:
         # Get next directory to process and query its date right before processing to be
         # sure we get the most recently updated value.
         current, prefix = stack.pop()
         date = provenance.directory_get_date_in_isochrone_frontier(current)
 
         if date is None:
             # The directory has never been seen on the outer isochrone frontier of
             # previously processed revisions. Its children should be analyzed.
             blobs = [child for child in current if isinstance(child, FileEntry)]
             subdirs = [child for child in current if isinstance(child, DirectoryEntry)]
 
             # Get the list of ids with no duplicates to ensure we have available dates
             # for all the elements. This prevents taking a wrong decision when a blob
             # occurs more than once in the same directory.
             ids = list(
                 dict.fromkeys(
                     [child.id for child in blobs] + [child.id for child in subdirs]
                 )
             )
             if ids:
                 # Known dates for the blobs in the current directory.
                 blobdates = provenance.content_get_early_dates(blobs)
                 # Known dates for the subdirectories in the current directory that
                 # belong to the outer isochrone frontier of some previously processed
                 # revision.
                 knowndates = provenance.directory_get_dates_in_isochrone_frontier(
                     subdirs
                 )
                 # Known dates for the subdirectories in the current directory that are
                 # inside the isochrone frontier of the revision.
                 innerdates = {
                     innerdir.id: innerdate
                     for path, (innerdir, innerdate) in innerdirs.items()
                     if is_child(path, prefix)
                 }
                 # Known dates for the subdirectories in the current directory that are
                 # outside the isochrone frontier of the revision.
                 outerdates = {
                     outerdir.id: outerdate
                     for path, (outerdir, outerdate) in outerdirs.items()
                     if is_child(path, prefix)
                 }
 
                 # All known dates for child nodes of the current directory.
                 assert not (innerdates.keys() & outerdates.keys())
                 dates = list(
                     {**blobdates, **knowndates, **innerdates, **outerdates}.values()
                 )
 
                 if len(dates) == len(ids):
                     # All child nodes of current directory are already known.
                     maxdate = max(dates)
 
                     if maxdate < revision.date:
                         # The directory is outside the isochrone frontier of the
                         # revision. It is a candidate to be added to the outer frontier.
                         outerdirs[prefix] = (current, maxdate)
                         # Its children are removed since they are no longer candidates.
                         outerdirs = {
                             path: outerdir
                             for path, outerdir in outerdirs.items()
                             if not is_child(path, prefix)
                         }
 
                     elif maxdate == revision.date:
                         # The current directory is inside the isochrone frontier.
                         innerdirs[prefix] = (current, revision.date)
                         # Add blobs present in this level to the revision. No need to
                         # update dates as they are at most equal to current one.
                         for blob in blobs:
                             provenance.content_add_to_revision(revision, blob, prefix)
                         # If any of its children was found outside the frontier it
                         # should be added to the outer frontier now.
                         if outerdates:
                             for path, (outerdir, outerdate) in outerdirs.items():
                                 if is_child(path, prefix):
                                     provenance.directory_set_date_in_isochrone_frontier(
                                         outerdir, outerdate
                                     )
                                     provenance.directory_add_to_revision(
                                         revision, outerdir, path
                                     )
                                     directory_process_content(
                                         provenance,
                                         directory=outerdir,
                                         relative=outerdir,
                                     )
                             # Removed processed elements to avoid duplicating work.
                             outerdirs = {
                                 path: outerdir
                                 for path, outerdir in outerdirs.items()
                                 if not is_child(path, prefix)
                             }
                         # There can still be subdirectories that are known to be in the
                         # outter isochrone frontier of previous processed revisions.
                         # Thus, they are not in the list of candidates but have to be
                         # added to current revisions as well.
                         for subdir in subdirs:
                             knowndate = knowndates.get(subdir.id, None)
                             if knowndate is not None and knowndate <= revision.date:
                                 # Less or equal since the directory could have been
                                 # added to the outer isochrone frontier when processing
                                 # a different directory's subtree of this very same
                                 # revision.
                                 provenance.directory_add_to_revision(
                                     revision, subdir, os.path.join(prefix, subdir.name)
                                 )
 
                     else:
                         # The revision is out of order. The current directory does not
                         # belong to the outer isochrone frontier of any previously
                         # processed revision yet all its children nodes are known. They
                         # should be re-analyzed (and timestamps eventually updated) and
                         # current directory updated after them.
                         stack.append((current, prefix))
                         directory_update_content(
                             stack,
                             provenance,
                             revision,
                             current,
                             prefix,
                             subdirs=subdirs,
                             blobs=blobs,
                             blobdates=blobdates,
                         )
 
                 else:
                     # Al least one child node is unknown, ie. the current directory is
                     # inside the isochrone frontier of the current revision. Its child
                     # nodes should be analyzed and current directory updated after them.
                     stack.append((current, prefix))
                     directory_update_content(
                         stack,
                         provenance,
                         revision,
                         current,
                         prefix,
                         subdirs=subdirs,
                         blobs=blobs,
                         blobdates=blobdates,
                     )
 
             else:
                 # Empty directory. Just consider it to be in the inner frontier of
                 # current revision (ie. all its children are already "known").
                 innerdirs[prefix] = (current, revision.date)
 
         elif revision.date < date:
             # The revision is out of order. The current directory belongs to the outer
             # isochrone frontier of some previously processed revison but current
             # revision is earlier. The frontier record should be invalidated, children
             # nodes re-analyzed (and timestamps eventually updated) and current
             # directory updated after them.
             stack.append((current, prefix))
             provenance.directory_invalidate_in_isochrone_frontier(current)
             directory_update_content(stack, provenance, revision, current, prefix)
 
         else:
             # The directory has already been seen on the outer isochrone frontier of an
-            # earlier revision. Just add it to the current revision.
-            provenance.directory_add_to_revision(revision, current, prefix)
+            # earlier revision. Just stop the recursion here.
+            pass
 
     if root.name in outerdirs:
         # Only the root directory should be considered at this point.
         outerdir, outerdate = outerdirs[root.name]
 
         provenance.directory_set_date_in_isochrone_frontier(outerdir, outerdate)
         provenance.directory_add_to_revision(revision, outerdir, root.name)
         directory_process_content(provenance, directory=outerdir, relative=outerdir)