Page MenuHomeSoftware Heritage

No OneTemporary

This document is not UTF8. It was detected as ISO-8859-1 (Latin 1) and converted to UTF8 for display.
diff --git a/requirements-test.txt b/requirements-test.txt
index c23c4cc..b983580 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,7 +1,8 @@
+grpcio
pytest
pytest-rabbitmq
swh.loader.git >= 0.8
swh.journal >= 0.8
swh.storage >= 0.40
-swh.graph >= 0.3.2
+swh.graph[testing] >= 1.0.1
types-Deprecated
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
index 99370df..99163ba 100644
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -1,123 +1,121 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import annotations
from typing import TYPE_CHECKING
import warnings
if TYPE_CHECKING:
from .archive import ArchiveInterface
from .interface import ProvenanceInterface, ProvenanceStorageInterface
def get_archive(cls: str, **kwargs) -> ArchiveInterface:
"""Get an archive object of class ``cls`` with arguments ``args``.
Args:
cls: archive's class, either 'api', 'direct' or 'graph'
args: dictionary of arguments passed to the archive class constructor
Returns:
an instance of archive object (either using swh.storage API or direct
queries to the archive's database)
Raises:
:cls:`ValueError` if passed an unknown archive class.
"""
if cls == "api":
from swh.storage import get_storage
from .storage.archive import ArchiveStorage
return ArchiveStorage(get_storage(**kwargs["storage"]))
elif cls == "direct":
from swh.core.db import BaseDb
from .postgresql.archive import ArchivePostgreSQL
return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
elif cls == "graph":
try:
- from swh.graph.client import RemoteGraphClient
from swh.storage import get_storage
from .swhgraph.archive import ArchiveGraph
- graph = RemoteGraphClient(kwargs.get("url"))
- return ArchiveGraph(graph, get_storage(**kwargs["storage"]))
+ return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"]))
except ModuleNotFoundError:
raise EnvironmentError(
"Graph configuration required but module is not installed."
)
elif cls == "multiplexer":
from .multiplexer.archive import ArchiveMultiplexed
archives = []
for ctr, archive in enumerate(kwargs["archives"]):
name = archive.pop("name", f"backend_{ctr}")
archives.append((name, get_archive(**archive)))
return ArchiveMultiplexed(archives)
else:
raise ValueError
def get_provenance(**kwargs) -> ProvenanceInterface:
"""Get an provenance object with arguments ``args``.
Args:
args: dictionary of arguments to retrieve a swh.provenance.storage
class (see :func:`get_provenance_storage` for details)
Returns:
an instance of provenance object
"""
from .provenance import Provenance
return Provenance(get_provenance_storage(**kwargs))
def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface:
"""Get an archive object of class ``cls`` with arguments ``args``.
Args:
cls: storage's class, only 'local' is currently supported
args: dictionary of arguments passed to the storage class constructor
Returns:
an instance of storage object
Raises:
:cls:`ValueError` if passed an unknown archive class.
"""
if cls in ["local", "postgresql"]:
from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
if cls == "local":
warnings.warn(
'"local" class is deprecated for provenance storage, please '
'use "postgresql" class instead.',
DeprecationWarning,
)
raise_on_commit = kwargs.get("raise_on_commit", False)
return ProvenanceStoragePostgreSql(
raise_on_commit=raise_on_commit, **kwargs["db"]
)
elif cls == "rabbitmq":
from .api.client import ProvenanceStorageRabbitMQClient
rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs)
if TYPE_CHECKING:
assert isinstance(rmq_storage, ProvenanceStorageInterface)
return rmq_storage
raise ValueError
diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/swhgraph/archive.py
index 93cd3c8..e0cce34 100644
--- a/swh/provenance/swhgraph/archive.py
+++ b/swh/provenance/swhgraph/archive.py
@@ -1,51 +1,80 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Any, Dict, Iterable, Tuple
+from google.protobuf.field_mask_pb2 import FieldMask
+import grpc
+
from swh.core.statsd import statsd
+from swh.graph.rpc import swhgraph_pb2, swhgraph_pb2_grpc
from swh.model.model import Sha1Git
from swh.model.swhids import CoreSWHID, ObjectType
from swh.storage.interface import StorageInterface
ARCHIVE_DURATION_METRIC = "swh_provenance_archive_graph_duration_seconds"
class ArchiveGraph:
- def __init__(self, graph, storage: StorageInterface) -> None:
- self.graph = graph
+ def __init__(self, url, storage: StorageInterface) -> None:
+ self.graph_url = url
+ self._channel = grpc.insecure_channel(self.graph_url)
+ self._stub = swhgraph_pb2_grpc.TraversalServiceStub(self._channel)
self.storage = storage # required by ArchiveInterface
@statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
def directory_ls(self, id: Sha1Git, minsize: int = 0) -> Iterable[Dict[str, Any]]:
raise NotImplementedError
@statsd.timed(
metric=ARCHIVE_DURATION_METRIC,
tags={"method": "revision_get_some_outbound_edges"},
)
def revision_get_some_outbound_edges(
self, revision_id: Sha1Git
) -> Iterable[Tuple[Sha1Git, Sha1Git]]:
- src = CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id)
- request = self.graph.visit_edges(str(src), edges="rev:rev")
-
- for edge in request:
- if edge:
- yield (
- CoreSWHID.from_string(edge[0]).object_id,
- CoreSWHID.from_string(edge[1]).object_id,
- )
+ src = str(CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id))
+ request = self._stub.Traverse(
+ swhgraph_pb2.TraversalRequest(
+ src=[src],
+ edges="rev:rev",
+ max_edges=1000,
+ mask=FieldMask(paths=["swhid", "successor"]),
+ )
+ )
+ try:
+ for node in request:
+ obj_id = CoreSWHID.from_string(node.swhid).object_id
+ if node.successor:
+ for parent in node.successor:
+ yield (obj_id, CoreSWHID.from_string(parent.swhid).object_id)
+ except grpc.RpcError as e:
+ if (
+ e.code() == grpc.StatusCode.INVALID_ARGUMENT
+ and "Unknown SWHID" in e.details()
+ ):
+ pass
+ raise
@statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
- src = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=id)
- request = self.graph.visit_nodes(
- str(src), edges="snp:rev,snp:rel,rel:rev", return_types="rev"
- )
-
- yield from (
- CoreSWHID.from_string(swhid).object_id for swhid in request if swhid
+ src = str(CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=id))
+ request = self._stub.Traverse(
+ swhgraph_pb2.TraversalRequest(
+ src=[src],
+ edges="snp:rev,snp:rel,rel:rev",
+ return_nodes=swhgraph_pb2.NodeFilter(types="rev"),
+ mask=FieldMask(paths=["swhid"]),
+ )
)
+ try:
+ yield from (CoreSWHID.from_string(node.swhid).object_id for node in request)
+ except grpc.RpcError as e:
+ if (
+ e.code() == grpc.StatusCode.INVALID_ARGUMENT
+ and "Unknown SWHID" in e.details()
+ ):
+ pass
+ raise
diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py
index 82128a4..20062ac 100644
--- a/swh/provenance/tests/conftest.py
+++ b/swh/provenance/tests/conftest.py
@@ -1,162 +1,201 @@
# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from contextlib import contextmanager
from datetime import datetime
+import multiprocessing
from os import path
+from pathlib import Path
from typing import Any, Dict, Generator, List
from _pytest.fixtures import SubRequest
+from aiohttp.test_utils import TestClient, TestServer, loop_context
import msgpack
import psycopg2.extensions
import pytest
from pytest_postgresql.factories import postgresql
+from swh.graph.http_server import make_app
from swh.journal.serializers import msgpack_ext_hook
from swh.model.model import BaseModel, TimestampWithTimezone
from swh.provenance import get_provenance, get_provenance_storage
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface, ProvenanceStorageInterface
from swh.provenance.storage.archive import ArchiveStorage
from swh.storage.interface import StorageInterface
from swh.storage.replay import OBJECT_CONVERTERS, OBJECT_FIXERS, process_replay_objects
@pytest.fixture(
params=[
"with-path",
"without-path",
"with-path-denormalized",
"without-path-denormalized",
]
)
def provenance_postgresqldb(
request: SubRequest,
postgresql: psycopg2.extensions.connection,
) -> Dict[str, str]:
"""return a working and initialized provenance db"""
from swh.core.db.db_utils import (
init_admin_extensions,
populate_database_for_package,
)
init_admin_extensions("swh.provenance", postgresql.dsn)
populate_database_for_package(
"swh.provenance", postgresql.dsn, flavor=request.param
)
return postgresql.get_dsn_parameters()
@pytest.fixture(params=["postgresql", "rabbitmq"])
def provenance_storage(
request: SubRequest,
provenance_postgresqldb: Dict[str, str],
) -> Generator[ProvenanceStorageInterface, None, None]:
"""Return a working and initialized ProvenanceStorageInterface object"""
if request.param == "rabbitmq":
from swh.provenance.api.server import ProvenanceStorageRabbitMQServer
rabbitmq = request.getfixturevalue("rabbitmq")
host = rabbitmq.args["host"]
port = rabbitmq.args["port"]
rabbitmq_params: Dict[str, Any] = {
"url": f"amqp://guest:guest@{host}:{port}/%2f",
"storage_config": {
"cls": "postgresql",
"db": provenance_postgresqldb,
"raise_on_commit": True,
},
}
server = ProvenanceStorageRabbitMQServer(
url=rabbitmq_params["url"], storage_config=rabbitmq_params["storage_config"]
)
server.start()
with get_provenance_storage(cls=request.param, **rabbitmq_params) as storage:
yield storage
server.stop()
else:
# in test sessions, we DO want to raise any exception occurring at commit time
with get_provenance_storage(
cls=request.param, db=provenance_postgresqldb, raise_on_commit=True
) as storage:
yield storage
provenance_postgresql = postgresql("postgresql_proc", dbname="provenance_tests")
@pytest.fixture
def provenance(
provenance_postgresql: psycopg2.extensions.connection,
) -> Generator[ProvenanceInterface, None, None]:
"""Return a working and initialized ProvenanceInterface object"""
from swh.core.db.db_utils import (
init_admin_extensions,
populate_database_for_package,
)
init_admin_extensions("swh.provenance", provenance_postgresql.dsn)
populate_database_for_package(
"swh.provenance", provenance_postgresql.dsn, flavor="with-path"
)
# in test sessions, we DO want to raise any exception occurring at commit time
with get_provenance(
cls="postgresql",
db=provenance_postgresql.get_dsn_parameters(),
raise_on_commit=True,
) as provenance:
yield provenance
@pytest.fixture
def archive(swh_storage: StorageInterface) -> ArchiveInterface:
"""Return an ArchiveStorage-based ArchiveInterface object"""
return ArchiveStorage(swh_storage)
def fill_storage(storage: StorageInterface, data: Dict[str, List[dict]]) -> None:
objects = {
objtype: [objs_from_dict(objtype, d) for d in dicts]
for objtype, dicts in data.items()
}
process_replay_objects(objects, storage=storage)
def get_datafile(fname: str) -> str:
return path.join(path.dirname(__file__), "data", fname)
# TODO: this should return Dict[str, List[BaseModel]] directly, but it requires
# refactoring several tests
def load_repo_data(repo: str) -> Dict[str, List[dict]]:
data: Dict[str, List[dict]] = {}
with open(get_datafile(f"{repo}.msgpack"), "rb") as fobj:
unpacker = msgpack.Unpacker(
fobj,
raw=False,
ext_hook=msgpack_ext_hook,
strict_map_key=False,
timestamp=3, # convert Timestamp in datetime objects (tz UTC)
)
for msg in unpacker:
if len(msg) == 2: # old format
objtype, objd = msg
else: # now we should have a triplet (type, key, value)
objtype, _, objd = msg
data.setdefault(objtype, []).append(objd)
return data
def objs_from_dict(object_type: str, dict_repr: dict) -> BaseModel:
if object_type in OBJECT_FIXERS:
dict_repr = OBJECT_FIXERS[object_type](dict_repr)
obj = OBJECT_CONVERTERS[object_type](dict_repr)
return obj
def ts2dt(ts: Dict[str, Any]) -> datetime:
return TimestampWithTimezone.from_dict(ts).to_datetime()
+
+
+def run_grpc_server(queue, dataset_path):
+ try:
+ config = {"graph": {"path": dataset_path}}
+ with loop_context() as loop:
+ app = make_app(config=config, debug=True, spawn_rpc_port=None)
+ client = TestClient(TestServer(app), loop=loop)
+ loop.run_until_complete(client.start_server())
+ url = client.make_url("/graph/")
+ queue.put((url, app["rpc_url"]))
+ loop.run_forever()
+ except Exception as e:
+ queue.put(e)
+
+
+@contextmanager
+def grpc_server(dataset):
+ dataset_path = (
+ Path(__file__).parents[0] / "data/swhgraph" / dataset / "compressed/example"
+ )
+ queue = multiprocessing.Queue()
+ server = multiprocessing.Process(
+ target=run_grpc_server, kwargs={"queue": queue, "dataset_path": dataset_path}
+ )
+ server.start()
+ res = queue.get()
+ if isinstance(res, Exception):
+ raise res
+ grpc_url = res[1]
+ try:
+ yield grpc_url
+ finally:
+ server.terminate()
diff --git a/swh/provenance/tests/data/README.md b/swh/provenance/tests/data/README.md
index 5fc5a33..15e32da 100644
--- a/swh/provenance/tests/data/README.md
+++ b/swh/provenance/tests/data/README.md
@@ -1,184 +1,188 @@
# Provenance Index Test Dataset
This directory contains datasets used by `test_provenance_heurstics` tests of
the provenance index database.
## Datasets
There are currently 3 dataset:
- cmdbts2: original dataset
- out-of-order: with unsorted revisions
- with-merge: with merge revisions
Each dataset `xxx` consist in several parts:
- a description of a git repository as a yaml file named `xxx_repo.yaml`,
- a msgpack file containing storage objects for the given repository, from
which the storage is filled before each test using these data, and
- a set of synthetic files, named `synthetic_xxx_(lower|upper)_<mindepth>.txt`,
describing the expected result in the provenance database if ingested with
the flag `lower` set or not set, and the `mindepth` value (integer, most
- often `1` or `2`).
+ often `1` or `2`),
+- a swh-graph compressed dataset (in the `swhgraph/` directory), used for testing
+ the ArchiveGraph backend.
+
### Generate datasets files
For each dataset `xxx`, execute a number of commands:
```
for dataset in cmdbts2 out-of-order with-merges; do
python generate_repo.py -C ${dataset}_repo.yaml $dataset > synthetic_${dataset}_template.txt
# you may want to edit/update synthetic files from this template, see below
python generate_storage_from_git.py $dataset
+ python generate_graph_dataset.py --compress $dataset
done
```
## Git repos description file
The description of a git repository is a yaml file which contains a list dicts,
each one representing a git revision to add (linearly) in the git repo used a
base for the dataset. Each dict consist in a structure like:
``` yaml
- msg: R00
date: 1000000000
content:
A/B/C/a: "content a"
```
this example will generate a git commit with the commit message "R00", the
author and committer date 1000000000 (given as a unix timestamp), and a one
file which path is `A/B/C/a` and content is "content a".
The file is parsed to create git revisions in a temporary git repository, in
order of appearance in the yaml file (so one may create an git repository with
'out-of-order' commits).
There is no way of creating branches and merges for now.
The tool to generate this git repo is `generate_repo.py`:
```
python generate_repo.py --help
Usage: generate_repo.py [OPTIONS] INPUT_FILE OUTPUT_DIR
Options:
-C, --clean-output / --no-clean-output
--help Show this message and exit.
```
It generates a git repository in the `OUTPUT_DIR` as well as produces a
template `synthetic` file on its standard output, which can be used to ease
writing the expected `synthetic` files.
Typical usage will be:
```
python generate_repo.py repo2_repo.yaml repo2 > synthetic_repo2_template.txt
```
Note that hashes (for revision, directories and content) of the git objects
only depends on the content of the input yaml file. Calling the tool twice on
the same input file should generate the exact same git repo twice.
Also note that the tool will add a branch at each revision (using the commit
message as bramch name), to make it easier to reference any point in the git
history.
## Msgpack dump of the storage
This file contains a set of storage objects (`Revision`, `Content` and
`Directory`) and is usually generated from a local git repository (typically
the one generated by the previous command) using the
`generate_storage_from_git.py` tool:
```
python generate_storage_from_git.py --help
Usage: generate_storage_from_git.py [OPTIONS] GIT_REPO
simple tool to generate the CMDBTS.msgpack dataset filed used in tests
Options:
-r, --head TEXT head revision to start from
-o, --output TEXT output file
--help Show this message and exit.
```
Typical usage would be, using the git repository `repo2` created previously:
```
python generate_storage_from_git.py repo2
Revision hash for master is 8363e8e98751dc9f264d2fedd6b829ad4b1218b0
Wrote 86 objects in repo2.msgpack
```
### Adding extra visits/snapshots
It is also possible to generate a storage from a git repo with extra origin
visits, using the `--visit` option of the `generate_storage_from_git` tool.
This option expect a yaml file as argument. This file contains a description of
extra visits (and snapshots) you want to add to the storage.
The format is simple, for example:
```
# a visit pattern scenario for the 'repo_with_merges' repo
- origin: http://repo_with_merges/1/
date: 1000000015
branches:
- R01
```
will create an OriginVisit (at given date) for the given origin URL (the Origin
will be created as well), with a `Snapshot` including the listed
branches.
## Synthetic files
These files describe the expected content of the provenance database for each
revision (in order of ingestion).
The `generate_repo.py` tool will produce a template of synthetic file like:
```
1000000000.0 b582a17b3fc37f72fc57877616f85c3f0abed064 R00
R00 | | | R b582a17b3fc37f72fc57877616f85c3f0abed064 | 1000000000.0
| | . | D a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1 | 0.0
| | A | D 1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b | 0.0
| | A/B | D 36876d475197b5ad86ad592e8e28818171455f16 | 0.0
| | A/B/C | D 98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3 | 0.0
| | A/B/C/a | C 20329687bb9c1231a7e05afe86160343ad49b494 | 0.0
1000000010.0 8259eeae2ff5046f0bb4393d6e894fe6d7e01bfe R01
R01 | | | R 8259eeae2ff5046f0bb4393d6e894fe6d7e01bfe | 1000000010.0
| | . | D b3cf11b22c9f93c3c494cf90ab072f394155072d | 0.0
| | A | D baca735bf8b8720131b4bfdb47c51631a9260348 | 0.0
| | A/B | D 4b28979d88ed209a09c272bcc80f69d9b18339c2 | 0.0
| | A/B/C | D c9cabe7f49012e3fdef6ac6b929efb5654f583cf | 0.0
| | A/B/C/a | C 20329687bb9c1231a7e05afe86160343ad49b494 | 0.0
| | A/B/C/b | C 50e9cdb03f9719261dd39d7f2920b906db3711a3 | 0.0
[...]
```
where all the content and directories of each revision are listed; it's then
the responsibility of the user to create the expected synthetic file for a
given heuristics configuration. For example, the 2 revisions above are to be
adapted, for the `(lower=True, mindepth=1)` case, as:
```
1000000000 c0d8929936631ecbcf9147be6b8aa13b13b014e4 R00
R00 | | | R c0d8929936631ecbcf9147be6b8aa13b13b014e4 | 1000000000
| R---C | A/B/C/a | C 20329687bb9c1231a7e05afe86160343ad49b494 | 0
1000000010 1444db96cbd8cd791abe83527becee73d3c64e86 R01
R01 | | | R 1444db96cbd8cd791abe83527becee73d3c64e86 | 1000000010
| R---C | A/B/C/a | C 20329687bb9c1231a7e05afe86160343ad49b494 | -10
| R---C | A/B/C/b | C 50e9cdb03f9719261dd39d7f2920b906db3711a3 | 0
```
diff --git a/swh/provenance/tests/data/generate_graph_dataset.py b/swh/provenance/tests/data/generate_graph_dataset.py
new file mode 100755
index 0000000..418303a
--- /dev/null
+++ b/swh/provenance/tests/data/generate_graph_dataset.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+# type: ignore
+
+import argparse
+import logging
+from pathlib import Path
+import shutil
+
+from swh.dataset.exporters.edges import GraphEdgesExporter
+from swh.dataset.exporters.orc import ORCExporter
+from swh.graph.webgraph import compress
+from swh.provenance.tests.conftest import load_repo_data
+
+
+def main():
+ logging.basicConfig(level=logging.INFO)
+
+ parser = argparse.ArgumentParser(description="Generate a test dataset")
+ parser.add_argument(
+ "--compress",
+ action="store_true",
+ default=False,
+ help="Also compress the dataset",
+ )
+ parser.add_argument("--output", help="output directory", default="swhgraph")
+ parser.add_argument("dataset", help="dataset name", nargs="+")
+ args = parser.parse_args()
+
+ for repo in args.dataset:
+ exporters = {"edges": GraphEdgesExporter, "orc": ORCExporter}
+ config = {"test_unique_file_id": "all"}
+ output_path = Path(args.output) / repo
+ data = load_repo_data(repo)
+ print(data.keys())
+
+ for name, exporter in exporters.items():
+ if (output_path / name).exists():
+ shutil.rmtree(output_path / name)
+ with exporter(config, output_path / name) as e:
+ for object_type, objs in data.items():
+ for obj_dict in objs:
+ e.process_object(object_type, obj_dict)
+
+ if args.compress:
+ if (output_path / "compressed").exists():
+ shutil.rmtree(output_path / "compressed")
+ compress("example", output_path / "orc", output_path / "compressed")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labelobl b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labelobl
new file mode 100644
index 0000000..a301d27
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labelobl differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labeloffsets b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labeloffsets
new file mode 100644
index 0000000..05e1ddf
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labeloffsets
@@ -0,0 +1 @@
+ƅ†±£F†PÑ£FÃ#FÂÌ M°ËaÈÑ£C#J5¶51 º4hÐÈfØÐÌ3± 
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labels b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labels
new file mode 100644
index 0000000..26fbec2
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.labels
@@ -0,0 +1,4 @@
+Fa4 æ—ˆ(Š(“@D ADIBDyAÄ,gΗ `š à” €™ °œ Т
+•
+0Ÿ
+ÿM„àôÈNàN„@Ä,F$AÏOÐÓ¡£q&P‘P±‘QPQ} ‘"P‘ PÑGQ€
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.properties b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.properties
new file mode 100644
index 0000000..a20a666
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-labelled.properties
@@ -0,0 +1,3 @@
+graphclass = it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph
+labelspec = org.softwareheritage.graph.labels.SwhLabel(DirEntry,9)
+underlyinggraph = example
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labelobl b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labelobl
new file mode 100644
index 0000000..22cf597
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labelobl differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labeloffsets b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labeloffsets
new file mode 100644
index 0000000..ced910a
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labeloffsets
@@ -0,0 +1 @@
+‚á£F†’4h%† F Èä84@È4“††®8rCFHj4háÈhÑ£PÑ´
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labels b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labels
new file mode 100644
index 0000000..59fcae4
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.labels differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.properties b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.properties
new file mode 100644
index 0000000..c4ec5a9
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed-labelled.properties
@@ -0,0 +1,3 @@
+graphclass = it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph
+labelspec = org.softwareheritage.graph.labels.SwhLabel(DirEntry,9)
+underlyinggraph = example-transposed
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.graph b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.graph
new file mode 100644
index 0000000..69aa4cb
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.graph differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.obl b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.obl
new file mode 100644
index 0000000..64106fd
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.obl differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.offsets b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.offsets
new file mode 100644
index 0000000..f6d6755
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.offsets
@@ -0,0 +1,3 @@
+…Â"‚!€ˆ¡B…‚…
+Â#‚C‚!ÑC!1Á Èӄ… CX"(hXP¡B‡8PáA¨¡Â…
+(R
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.properties b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.properties
new file mode 100644
index 0000000..d040a37
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example-transposed.properties
@@ -0,0 +1,35 @@
+#BVGraph properties
+#Wed Aug 31 17:09:00 CEST 2022
+bitsforreferences=90
+avgbitsforintervals=0.929
+graphclass=it.unimi.dsi.big.webgraph.BVGraph
+avgdist=0.329
+successoravggap=18.181
+residualexpstats=32,17,9,9,11,11,6
+arcs=108
+minintervallength=4
+bitsforoutdegrees=228
+residualavgloggap=2.860413564412166
+avgbitsforoutdegrees=3.257
+bitsforresiduals=531
+successoravgloggap=3.0868698490060527
+maxrefcount=3
+successorexpstats=31,15,13,14,14,14,7
+residualarcs=95
+avgbitsforresiduals=7.586
+avgbitsforblocks=0.557
+windowsize=7
+residualavggap=16.647
+copiedarcs=13
+avgbitsforreferences=1.286
+version=0
+compratio=1.281
+bitsperlink=8.824
+compressionflags=
+nodes=70
+avgref=0.243
+zetak=3
+bitsforintervals=65
+intervalisedarcs=0
+bitspernode=13.614
+bitsforblocks=39
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.edges.count.txt b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.edges.count.txt
new file mode 100644
index 0000000..58c9bdf
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.edges.count.txt
@@ -0,0 +1 @@
+111
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.edges.stats.txt b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.edges.stats.txt
new file mode 100644
index 0000000..fe2fe20
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.edges.stats.txt
@@ -0,0 +1,8 @@
+dir:cnt 17
+dir:dir 32
+ori:snp 8
+rel:rev 2
+rev:dir 14
+rev:rev 13
+snp:rel 2
+snp:rev 23
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.graph b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.graph
new file mode 100644
index 0000000..d6c4fe4
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.graph
@@ -0,0 +1 @@
+®“]?ut»]¯o¯]®×kµÝ÷Kõåtô~¦™½ŽTIQtöÅçÒãÒ(>©ø°ú碷ô]­­vò̲\‚-Ï[Ö¾ÕŸiÖÆ»z ]“|µËÈòUÒíkË·‘®j[¼‡kê—ï"͖RÝvÅ-ÞF›–Ü
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.indegree b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.indegree
new file mode 100644
index 0000000..3f6cf6c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.indegree
@@ -0,0 +1,6 @@
+3
+44
+11
+8
+2
+2
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.count.txt b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.count.txt
new file mode 100644
index 0000000..7facc89
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.count.txt
@@ -0,0 +1 @@
+36
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.csv.zst
new file mode 100644
index 0000000..8a80894
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.bytearray b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.bytearray
new file mode 100644
index 0000000..0b01286
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.bytearray differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.pointers b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.pointers
new file mode 100644
index 0000000..715d8b9
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.pointers differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.properties b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.properties
new file mode 100644
index 0000000..ca95cdb
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.fcl.properties
@@ -0,0 +1,2 @@
+n=36
+ratio=4
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.mph b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.mph
new file mode 100644
index 0000000..b3eb5cf
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.labels.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.mph b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.mph
new file mode 100644
index 0000000..e675b2a
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.node2swhid.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.node2swhid.bin
new file mode 100644
index 0000000..7b18d4e
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.node2swhid.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.node2type.map b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.node2type.map
new file mode 100644
index 0000000..4da4756
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.node2type.map differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.count.txt b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.count.txt
new file mode 100644
index 0000000..2bbd69c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.count.txt
@@ -0,0 +1 @@
+70
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.csv.zst
new file mode 100644
index 0000000..d997472
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.stats.txt b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.stats.txt
new file mode 100644
index 0000000..1f69788
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.nodes.stats.txt
@@ -0,0 +1,6 @@
+cnt 9
+dir 35
+ori 3
+rel 2
+rev 14
+snp 7
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.obl b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.obl
new file mode 100644
index 0000000..9db18bf
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.obl differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.offsets b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.offsets
new file mode 100644
index 0000000..2ac478e
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.offsets
@@ -0,0 +1,4 @@
+¡G8P¡BáB…
+‡
+…@˜ G‡FŠ(2C‚¡ ˆáAAÄ(p ¨EG
+  ’A‘À
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.order b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.order
new file mode 100644
index 0000000..d4fa8fb
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.order differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.outdegree b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.outdegree
new file mode 100644
index 0000000..a4ae5f9
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.outdegree
@@ -0,0 +1,17 @@
+9
+31
+27
+1
+1
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.count.txt b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.count.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.count.txt
@@ -0,0 +1 @@
+1
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.csv.zst
new file mode 100644
index 0000000..4131fba
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.mph b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.mph
new file mode 100644
index 0000000..817afdc
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.persons.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.properties b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.properties
new file mode 100644
index 0000000..603889a
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.properties
@@ -0,0 +1,35 @@
+#BVGraph properties
+#Wed Aug 31 17:08:58 CEST 2022
+bitsforreferences=68
+avgbitsforintervals=0.971
+graphclass=it.unimi.dsi.big.webgraph.BVGraph
+avgdist=0.1
+successoravggap=20.069
+residualexpstats=17,28,15,11,11,9,8
+arcs=108
+minintervallength=4
+bitsforoutdegrees=202
+residualavgloggap=3.0514710286903775
+avgbitsforoutdegrees=2.886
+bitsforresiduals=562
+successoravgloggap=3.1391563452679465
+maxrefcount=3
+successorexpstats=21,28,15,10,11,12,11
+residualarcs=99
+avgbitsforresiduals=8.029
+avgbitsforblocks=0.271
+windowsize=7
+residualavggap=17.636
+copiedarcs=4
+avgbitsforreferences=0.971
+version=0
+compratio=1.236
+bitsperlink=8.509
+compressionflags=
+nodes=70
+avgref=0.071
+zetak=3
+bitsforintervals=68
+intervalisedarcs=5
+bitspernode=13.129
+bitsforblocks=19
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_id.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_id.bin
new file mode 100644
index 0000000..825935f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_id.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_timestamp.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_timestamp.bin
new file mode 100644
index 0000000..3a0606a
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_timestamp.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_timestamp_offset.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_timestamp_offset.bin
new file mode 100644
index 0000000..b8a2b7b
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.author_timestamp_offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_id.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_id.bin
new file mode 100644
index 0000000..1d37153
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_id.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_timestamp.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_timestamp.bin
new file mode 100644
index 0000000..08f836c
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_timestamp.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_timestamp_offset.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_timestamp_offset.bin
new file mode 100644
index 0000000..0b34ff8
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.committer_timestamp_offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.content.is_skipped.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.content.is_skipped.bin
new file mode 100644
index 0000000..946ec52
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.content.is_skipped.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.content.length.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.content.length.bin
new file mode 100644
index 0000000..2814f43
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.content.length.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.message.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.message.bin
new file mode 100644
index 0000000..d13ffcd
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.message.bin
@@ -0,0 +1,19 @@
+dGFnIG1lc3NhZ2UK
+dGFnIG1lc3NhZ2UK
+UjAyCg==
+UjAxCg==
+UjA3Cg==
+UjAwCg==
+UjA0Cg==
+UjEzCg==
+UjA1Cg==
+UjA2Cg==
+UjExCg==
+UjEwCg==
+UjA4Cg==
+UjEyCg==
+UjA5Cg==
+UjAzCg==
+aHR0cHM6Ly9jbWRidHMy
+aHR0cDovL2NtZGJ0czIvMQ==
+aHR0cDovL2NtZGJ0czIvMg==
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.message.offset.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.message.offset.bin
new file mode 100644
index 0000000..31124b2
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.message.offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.tag_name.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.tag_name.bin
new file mode 100644
index 0000000..5b9230c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.tag_name.bin
@@ -0,0 +1,2 @@
+MC4x
+MS4w
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.tag_name.offset.bin b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.tag_name.offset.bin
new file mode 100644
index 0000000..497e4b8
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.property.tag_name.offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.stats b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.stats
new file mode 100644
index 0000000..0fe0cdf
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/cmdbts2/compressed/example.stats
@@ -0,0 +1,20 @@
+nodes=70
+arcs=108
+loops=0
+successoravggap=22.143
+avglocality=11.500
+minoutdegree=0
+maxoutdegree=16
+minoutdegreenode=0
+maxoutdegreenode=23
+dangling=9
+terminal=9
+percdangling=12.857142857142858
+avgoutdegree=1.542857142857143
+successorlogdeltastats=42,13,13,7,17,16
+successoravglogdelta=1.362
+minindegree=0
+maxindegree=5
+minindegreenode=68
+maxindegreenode=30
+avgindegree=1.542857142857143
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/content/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/content/graph-all.edges.csv.zst
new file mode 100644
index 0000000..e58c09d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/content/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/content/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/content/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..685b409
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/content/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/directory/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/directory/graph-all.edges.csv.zst
new file mode 100644
index 0000000..5d670c6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/directory/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/directory/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/directory/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..0c6a1a0
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/directory/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/origin/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/origin/graph-all.edges.csv.zst
new file mode 100644
index 0000000..b724424
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/origin/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/origin/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/origin/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..c3420f6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/origin/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/release/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/release/graph-all.edges.csv.zst
new file mode 100644
index 0000000..e7caa9f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/release/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/release/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/release/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..92d2100
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/release/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/revision/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/revision/graph-all.edges.csv.zst
new file mode 100644
index 0000000..d06d59b
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/revision/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/revision/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/revision/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..230713d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/revision/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/snapshot/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/snapshot/graph-all.edges.csv.zst
new file mode 100644
index 0000000..4bec52f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/snapshot/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/edges/snapshot/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/snapshot/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..91f9a18
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/edges/snapshot/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/content/content-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/content/content-all.orc
new file mode 100644
index 0000000..229c09e
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/content/content-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/directory/directory-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/directory/directory-all.orc
new file mode 100644
index 0000000..0effc8a
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/directory/directory-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/directory_entry/directory_entry-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/directory_entry/directory_entry-all.orc
new file mode 100644
index 0000000..640f5ff
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/directory_entry/directory_entry-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin/origin-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin/origin-all.orc
new file mode 100644
index 0000000..7176ae6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin/origin-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin_visit/origin_visit-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin_visit/origin_visit-all.orc
new file mode 100644
index 0000000..d97b9bf
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin_visit/origin_visit-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin_visit_status/origin_visit_status-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin_visit_status/origin_visit_status-all.orc
new file mode 100644
index 0000000..ed33240
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/origin_visit_status/origin_visit_status-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/release/release-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/release/release-all.orc
new file mode 100644
index 0000000..f3466fc
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/release/release-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision/revision-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision/revision-all.orc
new file mode 100644
index 0000000..8594a41
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision/revision-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision_extra_headers/revision_extra_headers-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision_extra_headers/revision_extra_headers-all.orc
new file mode 100644
index 0000000..71bc502
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision_extra_headers/revision_extra_headers-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision_history/revision_history-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision_history/revision_history-all.orc
new file mode 100644
index 0000000..511c21a
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/revision_history/revision_history-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/snapshot/snapshot-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/snapshot/snapshot-all.orc
new file mode 100644
index 0000000..8c98363
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/snapshot/snapshot-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/cmdbts2/orc/snapshot_branch/snapshot_branch-all.orc b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/snapshot_branch/snapshot_branch-all.orc
new file mode 100644
index 0000000..80536a9
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/cmdbts2/orc/snapshot_branch/snapshot_branch-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labelobl b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labelobl
new file mode 100644
index 0000000..00f1b5a
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labelobl differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labeloffsets b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labeloffsets
new file mode 100644
index 0000000..e26ecec
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labeloffsets
@@ -0,0 +1 @@
+…qbŋi¶€¢Ú,X±b°
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labels b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labels
new file mode 100644
index 0000000..0a8e1f1
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.labels
@@ -0,0 +1 @@
+HQ”Q AOýPL…XRãè"Š#("Š%
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.properties b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.properties
new file mode 100644
index 0000000..4c6856d
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-labelled.properties
@@ -0,0 +1,3 @@
+graphclass = it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph
+labelspec = org.softwareheritage.graph.labels.SwhLabel(DirEntry,7)
+underlyinggraph = example
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labelobl b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labelobl
new file mode 100644
index 0000000..a7d0751
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labelobl differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labeloffsets b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labeloffsets
new file mode 100644
index 0000000..d4f844c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labeloffsets
@@ -0,0 +1 @@
+…E…B¢Ù 0e 1 - ,
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labels b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labels
new file mode 100644
index 0000000..e7303e7
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.labels
@@ -0,0 +1 @@
+ED…FPÄýPL+E`« SCJ†%
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.properties b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.properties
new file mode 100644
index 0000000..da8e63b
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed-labelled.properties
@@ -0,0 +1,3 @@
+graphclass = it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph
+labelspec = org.softwareheritage.graph.labels.SwhLabel(DirEntry,7)
+underlyinggraph = example-transposed
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.graph b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.graph
new file mode 100644
index 0000000..85f422b
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.graph
@@ -0,0 +1 @@
+ª.›²¾âõélÞ½ %í鞟W]=7?#ú—K¥Òék ¥Òè
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.obl b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.obl
new file mode 100644
index 0000000..489d079
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.obl differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.offsets b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.offsets
new file mode 100644
index 0000000..7fb21de
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.offsets
@@ -0,0 +1 @@
+„…Š…"Æ …FŽ(P¡Â…
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.properties b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.properties
new file mode 100644
index 0000000..772764a
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example-transposed.properties
@@ -0,0 +1,35 @@
+#BVGraph properties
+#Wed Aug 31 12:09:02 CEST 2022
+bitsforreferences=29
+avgbitsforintervals=0.92
+graphclass=it.unimi.dsi.big.webgraph.BVGraph
+avgdist=0.2
+successoravggap=5.176
+residualexpstats=14,8,5,3,2,1
+arcs=37
+minintervallength=4
+bitsforoutdegrees=75
+residualavgloggap=2.052179971724485
+avgbitsforoutdegrees=3
+bitsforresiduals=151
+successoravgloggap=2.0969953139716377
+maxrefcount=3
+successorexpstats=14,9,7,4,3
+residualarcs=33
+avgbitsforresiduals=6.04
+avgbitsforblocks=0.32
+windowsize=7
+residualavggap=5.773
+copiedarcs=4
+avgbitsforreferences=1.16
+version=0
+compratio=1.439
+bitsperlink=7.73
+compressionflags=
+nodes=25
+avgref=0.24
+zetak=3
+bitsforintervals=23
+intervalisedarcs=0
+bitspernode=11.44
+bitsforblocks=8
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.edges.count.txt b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.edges.count.txt
new file mode 100644
index 0000000..81b5c5d
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.edges.count.txt
@@ -0,0 +1 @@
+37
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.edges.stats.txt b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.edges.stats.txt
new file mode 100644
index 0000000..db91efe
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.edges.stats.txt
@@ -0,0 +1,8 @@
+dir:cnt 4
+dir:dir 10
+ori:snp 1
+rel:rev 1
+rev:dir 7
+rev:rev 6
+snp:rel 1
+snp:rev 7
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.graph b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.graph
new file mode 100644
index 0000000..7aadbb4
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.graph
@@ -0,0 +1 @@
+}Í|ºMxú@»}£z zJ]‰¡Šäb–Þ¤‰v»]®ÖÈ»]½¹b
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.indegree b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.indegree
new file mode 100644
index 0000000..c2f175c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.indegree
@@ -0,0 +1,4 @@
+1
+12
+11
+1
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.count.txt b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.count.txt
new file mode 100644
index 0000000..8351c19
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.count.txt
@@ -0,0 +1 @@
+14
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.csv.zst
new file mode 100644
index 0000000..90bddcc
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.bytearray b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.bytearray
new file mode 100644
index 0000000..84e3f74
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.bytearray differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.pointers b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.pointers
new file mode 100644
index 0000000..ce4adc2
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.pointers differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.properties b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.properties
new file mode 100644
index 0000000..7f518f6
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.fcl.properties
@@ -0,0 +1,2 @@
+n=14
+ratio=4
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.mph b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.mph
new file mode 100644
index 0000000..5d22d5a
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.labels.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.mph b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.mph
new file mode 100644
index 0000000..e1372a5
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.node2swhid.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.node2swhid.bin
new file mode 100644
index 0000000..b76062d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.node2swhid.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.node2type.map b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.node2type.map
new file mode 100644
index 0000000..d04ab63
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.node2type.map differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.count.txt b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.count.txt
new file mode 100644
index 0000000..7273c0f
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.count.txt
@@ -0,0 +1 @@
+25
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.csv.zst
new file mode 100644
index 0000000..5997623
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.stats.txt b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.stats.txt
new file mode 100644
index 0000000..ca2afae
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.nodes.stats.txt
@@ -0,0 +1,6 @@
+cnt 3
+dir 12
+ori 1
+rel 1
+rev 7
+snp 1
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.obl b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.obl
new file mode 100644
index 0000000..6626e1e
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.obl differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.offsets b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.offsets
new file mode 100644
index 0000000..45ab69e
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.offsets differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.order b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.order
new file mode 100644
index 0000000..ea36496
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.order differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.outdegree b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.outdegree
new file mode 100644
index 0000000..8c763f4
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.outdegree
@@ -0,0 +1,9 @@
+3
+13
+8
+0
+0
+0
+0
+0
+1
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.count.txt b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.count.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.count.txt
@@ -0,0 +1 @@
+1
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.csv.zst
new file mode 100644
index 0000000..4131fba
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.mph b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.mph
new file mode 100644
index 0000000..78b5edd
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.persons.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.properties b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.properties
new file mode 100644
index 0000000..4d2f3d6
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.properties
@@ -0,0 +1,35 @@
+#BVGraph properties
+#Wed Aug 31 12:09:01 CEST 2022
+bitsforreferences=26
+avgbitsforintervals=1.32
+graphclass=it.unimi.dsi.big.webgraph.BVGraph
+avgdist=0.16
+successoravggap=7.973
+residualexpstats=5,14,3,4,2,2
+arcs=37
+minintervallength=4
+bitsforoutdegrees=73
+residualavgloggap=2.4469652731644436
+avgbitsforoutdegrees=2.92
+bitsforresiduals=148
+successoravgloggap=2.41382109239454
+maxrefcount=3
+successorexpstats=9,14,3,6,3,2
+residualarcs=30
+avgbitsforresiduals=5.92
+avgbitsforblocks=0.28
+windowsize=7
+residualavggap=8.150
+copiedarcs=2
+avgbitsforreferences=1.04
+version=0
+compratio=1.444
+bitsperlink=7.757
+compressionflags=
+nodes=25
+avgref=0.08
+zetak=3
+bitsforintervals=33
+intervalisedarcs=5
+bitspernode=11.48
+bitsforblocks=7
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_id.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_id.bin
new file mode 100644
index 0000000..468e2d5
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_id.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_timestamp.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_timestamp.bin
new file mode 100644
index 0000000..bc6bcf9
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_timestamp.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_timestamp_offset.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_timestamp_offset.bin
new file mode 100644
index 0000000..550768f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.author_timestamp_offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_id.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_id.bin
new file mode 100644
index 0000000..0b3efe2
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_id.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_timestamp.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_timestamp.bin
new file mode 100644
index 0000000..87d0f80
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_timestamp.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_timestamp_offset.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_timestamp_offset.bin
new file mode 100644
index 0000000..56fa627
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.committer_timestamp_offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.content.is_skipped.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.content.is_skipped.bin
new file mode 100644
index 0000000..dfb30cd
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.content.is_skipped.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.content.length.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.content.length.bin
new file mode 100644
index 0000000..161b585
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.content.length.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.message.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.message.bin
new file mode 100644
index 0000000..4ebaa78
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.message.bin
@@ -0,0 +1,9 @@
+dGFnIG1lc3NhZ2UK
+UjAzCg==
+UjAyCg==
+UjAwCg==
+UjAxCg==
+UjA1Cg==
+UjA0Cg==
+UjA2Cg==
+aHR0cHM6Ly9vdXQtb2Ytb3JkZXI=
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.message.offset.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.message.offset.bin
new file mode 100644
index 0000000..50acfdd
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.message.offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.tag_name.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.tag_name.bin
new file mode 100644
index 0000000..8a30784
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.tag_name.bin
@@ -0,0 +1 @@
+MS4w
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.tag_name.offset.bin b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.tag_name.offset.bin
new file mode 100644
index 0000000..2ddbcdf
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.property.tag_name.offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.stats b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.stats
new file mode 100644
index 0000000..1381f43
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/out-of-order/compressed/example.stats
@@ -0,0 +1,20 @@
+nodes=25
+arcs=37
+loops=0
+successoravggap=8.042
+avglocality=3.865
+minoutdegree=0
+maxoutdegree=8
+minoutdegreenode=1
+maxoutdegreenode=13
+dangling=3
+terminal=3
+percdangling=12.0
+avgoutdegree=1.48
+successorlogdeltastats=17,8,7,3,2
+successoravglogdelta=0.953
+minindegree=0
+maxindegree=3
+minindegreenode=12
+maxindegreenode=6
+avgindegree=1.48
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/content/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/content/graph-all.edges.csv.zst
new file mode 100644
index 0000000..e58c09d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/content/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/content/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/content/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..45155d8
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/content/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/directory/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/directory/graph-all.edges.csv.zst
new file mode 100644
index 0000000..8464d87
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/directory/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/directory/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/directory/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..48911a9
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/directory/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/origin/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/origin/graph-all.edges.csv.zst
new file mode 100644
index 0000000..e60908c
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/origin/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/origin/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/origin/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..768a836
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/origin/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/release/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/release/graph-all.edges.csv.zst
new file mode 100644
index 0000000..1bf8160
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/release/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/release/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/release/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..3348e74
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/release/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/revision/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/revision/graph-all.edges.csv.zst
new file mode 100644
index 0000000..7fd58fb
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/revision/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/revision/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/revision/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..04c1893
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/revision/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/snapshot/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/snapshot/graph-all.edges.csv.zst
new file mode 100644
index 0000000..2258118
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/snapshot/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/edges/snapshot/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/out-of-order/edges/snapshot/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..ef487f7
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/edges/snapshot/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/content/content-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/content/content-all.orc
new file mode 100644
index 0000000..00c508c
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/content/content-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/directory/directory-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/directory/directory-all.orc
new file mode 100644
index 0000000..ac904ea
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/directory/directory-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/directory_entry/directory_entry-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/directory_entry/directory_entry-all.orc
new file mode 100644
index 0000000..12d81bc
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/directory_entry/directory_entry-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin/origin-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin/origin-all.orc
new file mode 100644
index 0000000..52397c9
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin/origin-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin_visit/origin_visit-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin_visit/origin_visit-all.orc
new file mode 100644
index 0000000..c34d338
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin_visit/origin_visit-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin_visit_status/origin_visit_status-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin_visit_status/origin_visit_status-all.orc
new file mode 100644
index 0000000..ee97cd3
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/origin_visit_status/origin_visit_status-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/release/release-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/release/release-all.orc
new file mode 100644
index 0000000..f2fb3f2
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/release/release-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision/revision-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision/revision-all.orc
new file mode 100644
index 0000000..ea31d85
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision/revision-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision_extra_headers/revision_extra_headers-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision_extra_headers/revision_extra_headers-all.orc
new file mode 100644
index 0000000..71bc502
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision_extra_headers/revision_extra_headers-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision_history/revision_history-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision_history/revision_history-all.orc
new file mode 100644
index 0000000..61ebd2d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/revision_history/revision_history-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/snapshot/snapshot-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/snapshot/snapshot-all.orc
new file mode 100644
index 0000000..15bf41b
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/snapshot/snapshot-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/out-of-order/orc/snapshot_branch/snapshot_branch-all.orc b/swh/provenance/tests/data/swhgraph/out-of-order/orc/snapshot_branch/snapshot_branch-all.orc
new file mode 100644
index 0000000..5735dc9
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/out-of-order/orc/snapshot_branch/snapshot_branch-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labelobl b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labelobl
new file mode 100644
index 0000000..b42b77e
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labelobl differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labeloffsets b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labeloffsets
new file mode 100644
index 0000000..4a7c5c2
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labeloffsets
@@ -0,0 +1 @@
+ŒC4…ñƒ l¡v12€…L`Ä1°¸
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labels b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labels
new file mode 100644
index 0000000..4d25083
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.labels differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.properties b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.properties
new file mode 100644
index 0000000..d713a39
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-labelled.properties
@@ -0,0 +1,3 @@
+graphclass = it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph
+labelspec = org.softwareheritage.graph.labels.SwhLabel(DirEntry,8)
+underlyinggraph = example
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labelobl b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labelobl
new file mode 100644
index 0000000..e6a5376
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labelobl differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labeloffsets b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labeloffsets
new file mode 100644
index 0000000..0ed3ccc
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labeloffsets
@@ -0,0 +1 @@
+¡£0`\`\`Á„ƒ#h4†àÂ5…î@
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labels b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labels
new file mode 100644
index 0000000..542d104
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.labels
@@ -0,0 +1,2 @@
+ÒBõPá"”(…
+C(!N4À˜4±¥ (Q‚xOðžEDRhè1tàœ
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.properties b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.properties
new file mode 100644
index 0000000..5f12c3a
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed-labelled.properties
@@ -0,0 +1,3 @@
+graphclass = it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph
+labelspec = org.softwareheritage.graph.labels.SwhLabel(DirEntry,8)
+underlyinggraph = example-transposed
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.graph b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.graph
new file mode 100644
index 0000000..e695745
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.graph
@@ -0,0 +1 @@
+]½§WdíU {O±.ցtݵtßÖ¬º]üµÒüºÒDTúC]쒑¯ÌꨉÜÕ£]7JõdO^MzÒi5(…x
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.obl b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.obl
new file mode 100644
index 0000000..59f052f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.obl differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.offsets b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.offsets
new file mode 100644
index 0000000..4498a6b
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.offsets differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.properties b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.properties
new file mode 100644
index 0000000..225b3bc
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example-transposed.properties
@@ -0,0 +1,35 @@
+#BVGraph properties
+#Wed Aug 31 17:12:28 CEST 2022
+bitsforreferences=40
+avgbitsforintervals=0.861
+graphclass=it.unimi.dsi.big.webgraph.BVGraph
+avgdist=0.194
+successoravggap=8.786
+residualexpstats=13,9,13,13,7,2
+arcs=63
+minintervallength=4
+bitsforoutdegrees=118
+residualavgloggap=2.7235936086772763
+avgbitsforoutdegrees=3.278
+bitsforresiduals=302
+successoravgloggap=2.682254762270379
+maxrefcount=3
+successorexpstats=14,12,14,13,8,2
+residualarcs=57
+avgbitsforresiduals=8.389
+avgbitsforblocks=0.333
+windowsize=7
+residualavggap=9.053
+copiedarcs=6
+avgbitsforreferences=1.111
+version=0
+compratio=1.4
+bitsperlink=7.984
+compressionflags=
+nodes=36
+avgref=0.167
+zetak=3
+bitsforintervals=31
+intervalisedarcs=0
+bitspernode=13.972
+bitsforblocks=12
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.edges.count.txt b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.edges.count.txt
new file mode 100644
index 0000000..4b9026d
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.edges.count.txt
@@ -0,0 +1 @@
+63
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.edges.stats.txt b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.edges.stats.txt
new file mode 100644
index 0000000..04964a6
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.edges.stats.txt
@@ -0,0 +1,8 @@
+dir:cnt 4
+dir:dir 10
+ori:snp 7
+rel:rev 3
+rev:dir 9
+rev:rev 10
+snp:rel 3
+snp:rev 17
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.graph b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.graph
new file mode 100644
index 0000000..bf84c48
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.graph
@@ -0,0 +1 @@
+ZºéhWO¤Ë§Ü¾æ»]¯—k_}ûûè öR'°*=¡*é6ê…l_¾å‰%ÕT^=Y"ím‹D¾­ïË/VD
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.indegree b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.indegree
new file mode 100644
index 0000000..f3db18c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.indegree
@@ -0,0 +1,6 @@
+3
+17
+8
+4
+2
+2
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.count.txt b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.count.txt
new file mode 100644
index 0000000..3c03207
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.count.txt
@@ -0,0 +1 @@
+18
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.csv.zst
new file mode 100644
index 0000000..7a97feb
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.bytearray b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.bytearray
new file mode 100644
index 0000000..f89bd29
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.bytearray differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.pointers b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.pointers
new file mode 100644
index 0000000..2fbe1a8
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.pointers differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.properties b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.properties
new file mode 100644
index 0000000..10689c4
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.fcl.properties
@@ -0,0 +1,2 @@
+n=18
+ratio=4
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.mph b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.mph
new file mode 100644
index 0000000..baf256f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.labels.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.mph b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.mph
new file mode 100644
index 0000000..59dfb94
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.node2swhid.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.node2swhid.bin
new file mode 100644
index 0000000..a5662b5
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.node2swhid.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.node2type.map b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.node2type.map
new file mode 100644
index 0000000..0764054
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.node2type.map differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.count.txt b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.count.txt
new file mode 100644
index 0000000..7facc89
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.count.txt
@@ -0,0 +1 @@
+36
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.csv.zst
new file mode 100644
index 0000000..36a5a0c
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.stats.txt b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.stats.txt
new file mode 100644
index 0000000..3bc8668
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.nodes.stats.txt
@@ -0,0 +1,6 @@
+cnt 3
+dir 12
+ori 3
+rel 3
+rev 9
+snp 6
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.obl b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.obl
new file mode 100644
index 0000000..d6eb964
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.obl differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.offsets b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.offsets
new file mode 100644
index 0000000..975ac67
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.offsets
@@ -0,0 +1,3 @@
+Ž4PDPÔ5!B…
+8pD6
+Š8@<EE,pX*
\ No newline at end of file
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.order b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.order
new file mode 100644
index 0000000..b03d6f6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.order differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.outdegree b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.outdegree
new file mode 100644
index 0000000..22627eb
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.outdegree
@@ -0,0 +1,13 @@
+3
+17
+13
+0
+2
+0
+0
+0
+0
+0
+0
+0
+1
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.count.txt b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.count.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.count.txt
@@ -0,0 +1 @@
+1
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.csv.zst
new file mode 100644
index 0000000..4131fba
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.mph b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.mph
new file mode 100644
index 0000000..d57ac74
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.persons.mph differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.properties b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.properties
new file mode 100644
index 0000000..20e2ad2
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.properties
@@ -0,0 +1,35 @@
+#BVGraph properties
+#Wed Aug 31 17:12:27 CEST 2022
+bitsforreferences=46
+avgbitsforintervals=0.861
+graphclass=it.unimi.dsi.big.webgraph.BVGraph
+avgdist=0.361
+successoravggap=10.738
+residualexpstats=10,14,8,9,9,5
+arcs=63
+minintervallength=4
+bitsforoutdegrees=110
+residualavgloggap=2.8951480939978222
+avgbitsforoutdegrees=3.056
+bitsforresiduals=299
+successoravgloggap=2.778232574438545
+maxrefcount=3
+successorexpstats=14,15,9,11,9,5
+residualarcs=55
+avgbitsforresiduals=8.306
+avgbitsforblocks=0.25
+windowsize=7
+residualavggap=11.664
+copiedarcs=8
+avgbitsforreferences=1.278
+version=0
+compratio=1.378
+bitsperlink=7.857
+compressionflags=
+nodes=36
+avgref=0.194
+zetak=3
+bitsforintervals=31
+intervalisedarcs=0
+bitspernode=13.75
+bitsforblocks=9
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_id.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_id.bin
new file mode 100644
index 0000000..c2e54ed
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_id.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_timestamp.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_timestamp.bin
new file mode 100644
index 0000000..b503d6e
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_timestamp.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_timestamp_offset.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_timestamp_offset.bin
new file mode 100644
index 0000000..f05db97
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.author_timestamp_offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_id.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_id.bin
new file mode 100644
index 0000000..7215a00
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_id.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_timestamp.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_timestamp.bin
new file mode 100644
index 0000000..75ecbe4
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_timestamp.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_timestamp_offset.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_timestamp_offset.bin
new file mode 100644
index 0000000..a8fbcd6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.committer_timestamp_offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.content.is_skipped.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.content.is_skipped.bin
new file mode 100644
index 0000000..cc8a7ad
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.content.is_skipped.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.content.length.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.content.length.bin
new file mode 100644
index 0000000..acc443d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.content.length.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.message.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.message.bin
new file mode 100644
index 0000000..37c1431
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.message.bin
@@ -0,0 +1,15 @@
+dGFnIG1lc3NhZ2UK
+dGFnIG1lc3NhZ2UK
+dGFnIG1lc3NhZ2UK
+UjA4Cg==
+UjAwCg==
+UjA3Cg==
+UjA1Cg==
+UjA0Cg==
+UjAzCg==
+UjA2Cg==
+UjAxCg==
+UjAyCg==
+aHR0cHM6Ly93aXRoLW1lcmdlcw==
+aHR0cDovL3JlcG9fd2l0aF9tZXJnZXMvMS8=
+aHR0cDovL3JlcG9fd2l0aF9tZXJnZXMvMi8=
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.message.offset.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.message.offset.bin
new file mode 100644
index 0000000..2ae8028
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.message.offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.tag_name.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.tag_name.bin
new file mode 100644
index 0000000..25990fe
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.tag_name.bin
@@ -0,0 +1,3 @@
+MS4w
+MC4w
+MC45
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.tag_name.offset.bin b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.tag_name.offset.bin
new file mode 100644
index 0000000..ee1a889
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.property.tag_name.offset.bin differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.stats b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.stats
new file mode 100644
index 0000000..8e1d93c
--- /dev/null
+++ b/swh/provenance/tests/data/swhgraph/with-merges/compressed/example.stats
@@ -0,0 +1,20 @@
+nodes=36
+arcs=63
+loops=0
+successoravggap=11.152
+avglocality=6.825
+minoutdegree=0
+maxoutdegree=12
+minoutdegreenode=7
+maxoutdegreenode=26
+dangling=3
+terminal=3
+percdangling=8.333333333333334
+avgoutdegree=1.75
+successorlogdeltastats=20,14,7,13,9
+successoravglogdelta=1.215
+minindegree=0
+maxindegree=5
+minindegreenode=33
+maxindegreenode=24
+avgindegree=1.75
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/content/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/content/graph-all.edges.csv.zst
new file mode 100644
index 0000000..e58c09d
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/content/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/content/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/content/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..12608dc
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/content/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/directory/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/directory/graph-all.edges.csv.zst
new file mode 100644
index 0000000..41f1d90
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/directory/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/directory/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/directory/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..4919b19
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/directory/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/origin/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/origin/graph-all.edges.csv.zst
new file mode 100644
index 0000000..161f1ca
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/origin/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/origin/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/origin/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..33cadd1
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/origin/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/release/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/release/graph-all.edges.csv.zst
new file mode 100644
index 0000000..168817f
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/release/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/release/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/release/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..cd2ca39
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/release/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/revision/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/revision/graph-all.edges.csv.zst
new file mode 100644
index 0000000..b89bebe
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/revision/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/revision/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/revision/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..7a3d624
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/revision/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/snapshot/graph-all.edges.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/snapshot/graph-all.edges.csv.zst
new file mode 100644
index 0000000..6cee7e6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/snapshot/graph-all.edges.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/edges/snapshot/graph-all.nodes.csv.zst b/swh/provenance/tests/data/swhgraph/with-merges/edges/snapshot/graph-all.nodes.csv.zst
new file mode 100644
index 0000000..75f0651
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/edges/snapshot/graph-all.nodes.csv.zst differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/content/content-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/content/content-all.orc
new file mode 100644
index 0000000..275232c
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/content/content-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/directory/directory-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/directory/directory-all.orc
new file mode 100644
index 0000000..a72f982
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/directory/directory-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/directory_entry/directory_entry-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/directory_entry/directory_entry-all.orc
new file mode 100644
index 0000000..77f8fe6
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/directory_entry/directory_entry-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/origin/origin-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/origin/origin-all.orc
new file mode 100644
index 0000000..dfcfe09
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/origin/origin-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/origin_visit/origin_visit-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/origin_visit/origin_visit-all.orc
new file mode 100644
index 0000000..6f53707
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/origin_visit/origin_visit-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/origin_visit_status/origin_visit_status-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/origin_visit_status/origin_visit_status-all.orc
new file mode 100644
index 0000000..6d49ed1
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/origin_visit_status/origin_visit_status-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/release/release-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/release/release-all.orc
new file mode 100644
index 0000000..ab60282
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/release/release-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/revision/revision-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/revision/revision-all.orc
new file mode 100644
index 0000000..40824bc
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/revision/revision-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/revision_extra_headers/revision_extra_headers-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/revision_extra_headers/revision_extra_headers-all.orc
new file mode 100644
index 0000000..71bc502
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/revision_extra_headers/revision_extra_headers-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/revision_history/revision_history-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/revision_history/revision_history-all.orc
new file mode 100644
index 0000000..be75304
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/revision_history/revision_history-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/snapshot/snapshot-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/snapshot/snapshot-all.orc
new file mode 100644
index 0000000..5f67e59
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/snapshot/snapshot-all.orc differ
diff --git a/swh/provenance/tests/data/swhgraph/with-merges/orc/snapshot_branch/snapshot_branch-all.orc b/swh/provenance/tests/data/swhgraph/with-merges/orc/snapshot_branch/snapshot_branch-all.orc
new file mode 100644
index 0000000..1af4f78
Binary files /dev/null and b/swh/provenance/tests/data/swhgraph/with-merges/orc/snapshot_branch/snapshot_branch-all.orc differ
diff --git a/swh/provenance/tests/test_archive_interface.py b/swh/provenance/tests/test_archive_interface.py
index 9d673d8..38cadfe 100644
--- a/swh/provenance/tests/test_archive_interface.py
+++ b/swh/provenance/tests/test_archive_interface.py
@@ -1,254 +1,274 @@
# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from collections import Counter
from operator import itemgetter
from typing import Any
from typing import Counter as TCounter
from typing import Dict, Iterable, List, Set, Tuple, Type, Union
import pytest
from swh.core.db import BaseDb
-from swh.graph.naive_client import NaiveClient
from swh.model.model import (
SWH_MODEL_OBJECT_TYPES,
BaseModel,
Content,
Directory,
DirectoryEntry,
ObjectType,
Origin,
OriginVisitStatus,
Release,
Revision,
Sha1Git,
Snapshot,
SnapshotBranch,
TargetType,
)
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID
from swh.provenance.archive import ArchiveInterface
from swh.provenance.multiplexer.archive import ArchiveMultiplexed
from swh.provenance.postgresql.archive import ArchivePostgreSQL
from swh.provenance.storage.archive import ArchiveStorage
from swh.provenance.swhgraph.archive import ArchiveGraph
-from swh.provenance.tests.conftest import fill_storage, load_repo_data
+from swh.provenance.tests.conftest import fill_storage, grpc_server, load_repo_data
from swh.storage.interface import StorageInterface
from swh.storage.postgresql.storage import Storage
class ArchiveNoop:
storage: StorageInterface
def directory_ls(self, id: Sha1Git, minsize: int = 0) -> Iterable[Dict[str, Any]]:
return []
def revision_get_some_outbound_edges(
self, revision_id: Sha1Git
) -> Iterable[Tuple[Sha1Git, Sha1Git]]:
return []
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
return []
def check_directory_ls(
reference: ArchiveInterface, archive: ArchiveInterface, data: Dict[str, List[dict]]
) -> None:
for directory in data["directory"]:
entries_ref = sorted(
reference.directory_ls(directory["id"]), key=itemgetter("name")
)
entries = sorted(archive.directory_ls(directory["id"]), key=itemgetter("name"))
assert entries_ref == entries
def check_revision_get_some_outbound_edges(
reference: ArchiveInterface, archive: ArchiveInterface, data: Dict[str, List[dict]]
) -> None:
for revision in data["revision"]:
parents_ref: TCounter[Tuple[Sha1Git, Sha1Git]] = Counter(
reference.revision_get_some_outbound_edges(revision["id"])
)
parents: TCounter[Tuple[Sha1Git, Sha1Git]] = Counter(
archive.revision_get_some_outbound_edges(revision["id"])
)
# Check that all the reference outbound edges are included in the other
# archives's outbound edges
assert set(parents_ref.items()) <= set(parents.items())
def check_snapshot_get_heads(
reference: ArchiveInterface, archive: ArchiveInterface, data: Dict[str, List[dict]]
) -> None:
for snapshot in data["snapshot"]:
heads_ref: TCounter[Sha1Git] = Counter(
reference.snapshot_get_heads(snapshot["id"])
)
heads: TCounter[Sha1Git] = Counter(archive.snapshot_get_heads(snapshot["id"]))
assert heads_ref == heads
def get_object_class(object_type: str) -> Type[BaseModel]:
return SWH_MODEL_OBJECT_TYPES[object_type]
def data_to_model(data: Dict[str, List[dict]]) -> Dict[str, List[BaseModel]]:
model: Dict[str, List[BaseModel]] = {}
for object_type, objects in data.items():
for object in objects:
model.setdefault(object_type, []).append(
get_object_class(object_type).from_dict(object)
)
return model
def add_link(
edges: Set[
Tuple[
Union[CoreSWHID, ExtendedSWHID, str], Union[CoreSWHID, ExtendedSWHID, str]
]
],
src_obj: Union[Content, Directory, Origin, Release, Revision, Snapshot],
dst_id: bytes,
dst_type: ExtendedObjectType,
) -> None:
swhid = ExtendedSWHID(object_type=dst_type, object_id=dst_id)
edges.add((src_obj.swhid(), swhid))
def get_graph_data(
data: Dict[str, List[dict]]
) -> Tuple[
List[Union[CoreSWHID, ExtendedSWHID, str]],
List[
Tuple[
Union[CoreSWHID, ExtendedSWHID, str], Union[CoreSWHID, ExtendedSWHID, str]
]
],
]:
nodes: Set[Union[CoreSWHID, ExtendedSWHID, str]] = set()
edges: Set[
Tuple[
Union[CoreSWHID, ExtendedSWHID, str], Union[CoreSWHID, ExtendedSWHID, str]
]
] = set()
model = data_to_model(data)
for origin in model["origin"]:
assert isinstance(origin, Origin)
nodes.add(origin.swhid())
for status in model["origin_visit_status"]:
assert isinstance(status, OriginVisitStatus)
if status.origin == origin.url and status.snapshot is not None:
add_link(edges, origin, status.snapshot, ExtendedObjectType.SNAPSHOT)
for snapshot in model["snapshot"]:
assert isinstance(snapshot, Snapshot)
nodes.add(snapshot.swhid())
for branch in snapshot.branches.values():
assert isinstance(branch, SnapshotBranch)
if branch.target_type in [TargetType.RELEASE, TargetType.REVISION]:
target_type = (
ExtendedObjectType.RELEASE
if branch.target_type == TargetType.RELEASE
else ExtendedObjectType.REVISION
)
add_link(edges, snapshot, branch.target, target_type)
for revision in model["revision"]:
assert isinstance(revision, Revision)
nodes.add(revision.swhid())
# root directory
add_link(edges, revision, revision.directory, ExtendedObjectType.DIRECTORY)
# parent
for parent in revision.parents:
add_link(edges, revision, parent, ExtendedObjectType.REVISION)
dir_entry_types = {
"file": ExtendedObjectType.CONTENT,
"dir": ExtendedObjectType.DIRECTORY,
"rev": ExtendedObjectType.REVISION,
}
for directory in model["directory"]:
assert isinstance(directory, Directory)
nodes.add(directory.swhid())
for entry in directory.entries:
assert isinstance(entry, DirectoryEntry)
add_link(edges, directory, entry.target, dir_entry_types[entry.type])
for content in model["content"]:
assert isinstance(content, Content)
nodes.add(content.swhid())
object_type = {
ObjectType.CONTENT: ExtendedObjectType.CONTENT,
ObjectType.DIRECTORY: ExtendedObjectType.DIRECTORY,
ObjectType.REVISION: ExtendedObjectType.REVISION,
ObjectType.RELEASE: ExtendedObjectType.RELEASE,
ObjectType.SNAPSHOT: ExtendedObjectType.SNAPSHOT,
}
for release in model["release"]:
assert isinstance(release, Release)
nodes.add(release.swhid())
if release.target is not None:
add_link(edges, release, release.target, object_type[release.target_type])
return list(nodes), list(edges)
@pytest.mark.parametrize(
"repo",
("cmdbts2", "out-of-order", "with-merges"),
)
def test_archive_interface(repo: str, archive: ArchiveInterface) -> None:
# read data/README.md for more details on how these datasets are generated
data = load_repo_data(repo)
fill_storage(archive.storage, data)
# test against ArchiveStorage
archive_api = ArchiveStorage(archive.storage)
check_directory_ls(archive, archive_api, data)
check_revision_get_some_outbound_edges(archive, archive_api, data)
check_snapshot_get_heads(archive, archive_api, data)
# test against ArchivePostgreSQL
assert isinstance(archive.storage, Storage)
dsn = archive.storage.get_db().conn.dsn
with BaseDb.connect(dsn).conn as conn:
BaseDb.adapt_conn(conn)
archive_direct = ArchivePostgreSQL(conn)
check_directory_ls(archive, archive_direct, data)
check_revision_get_some_outbound_edges(archive, archive_direct, data)
check_snapshot_get_heads(archive, archive_direct, data)
- # test against ArchiveGraph
- nodes, edges = get_graph_data(data)
- graph = NaiveClient(nodes=nodes, edges=edges)
- archive_graph = ArchiveGraph(graph, archive.storage)
- with pytest.raises(NotImplementedError):
- check_directory_ls(archive, archive_graph, data)
- check_revision_get_some_outbound_edges(archive, archive_graph, data)
- check_snapshot_get_heads(archive, archive_graph, data)
+
+@pytest.mark.parametrize(
+ "repo",
+ ("cmdbts2", "out-of-order", "with-merges"),
+)
+def test_archive_graph(repo: str, archive: ArchiveInterface) -> None:
+ data = load_repo_data(repo)
+ fill_storage(archive.storage, data)
+
+ with grpc_server(repo) as url:
+ # test against ArchiveGraph
+ archive_graph = ArchiveGraph(url, archive.storage)
+ with pytest.raises(NotImplementedError):
+ check_directory_ls(archive, archive_graph, data)
+ check_revision_get_some_outbound_edges(archive, archive_graph, data)
+ check_snapshot_get_heads(archive, archive_graph, data)
+
+
+@pytest.mark.parametrize(
+ "repo",
+ ("cmdbts2", "out-of-order", "with-merges"),
+)
+def test_archive_multiplexed(repo: str, archive: ArchiveInterface) -> None:
+ # read data/README.md for more details on how these datasets are generated
+ data = load_repo_data(repo)
+ fill_storage(archive.storage, data)
# test against ArchiveMultiplexer
- archive_multiplexed = ArchiveMultiplexed(
- [("noop", ArchiveNoop()), ("graph", archive_graph), ("api", archive_api)]
- )
- check_directory_ls(archive, archive_multiplexed, data)
- check_revision_get_some_outbound_edges(archive, archive_multiplexed, data)
- check_snapshot_get_heads(archive, archive_multiplexed, data)
+ with grpc_server(repo) as url:
+ archive_api = ArchiveStorage(archive.storage)
+ archive_graph = ArchiveGraph(url, archive.storage)
+ archive_multiplexed = ArchiveMultiplexed(
+ [("noop", ArchiveNoop()), ("graph", archive_graph), ("api", archive_api)]
+ )
+ check_directory_ls(archive, archive_multiplexed, data)
+ check_revision_get_some_outbound_edges(archive, archive_multiplexed, data)
+ check_snapshot_get_heads(archive, archive_multiplexed, data)
def test_noop_multiplexer():
archive = ArchiveMultiplexed([("noop", ArchiveNoop())])
assert not archive.directory_ls(Sha1Git(b"abcd"))
assert not archive.revision_get_some_outbound_edges(Sha1Git(b"abcd"))
assert not archive.snapshot_get_heads(Sha1Git(b"abcd"))

File Metadata

Mime Type
application/octet-stream
Expires
Fri, Apr 25, 5:20 AM (1 d, 23 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3293819

Event Timeline