diff --git a/swh/dataset/graph.py b/swh/dataset/graph.py
--- a/swh/dataset/graph.py
+++ b/swh/dataset/graph.py
@@ -184,13 +184,22 @@
        function) ;
      - deflate the edges ;
      - count the number of edges and write it in graph.edges.count.txt ;
+     - count the number of occurrences of each edge type and write them
+       in graph.edges.stats.txt ;
      - concatenate all the (deflated) nodes from the export with the
        destination edges, and sort the output to get the list of unique graph
        nodes ;
      - count the number of unique graph nodes and write it in
        graph.nodes.count.txt ;
+     - count the number of occurrences of each node type and write them
+       in graph.nodes.stats.txt ;
      - compress and write the resulting nodes in graph.nodes.csv.zst.
     """
+
+    # Use awk as a replacement of `sort | uniq -c` to avoid buffering everything
+    # in memory
+    counter_command = "awk '{ t[$0]++ } END { for (i in t) print i,t[i] }'"
+
     # Use bytes for the sorting algorithm (faster than being locale-specific)
     env = {
         **os.environ.copy(),
@@ -212,15 +221,20 @@
                     "tee {export_path}/graph.edges.csv.zst |"
                     "zstdcat |"
                     "tee >( wc -l > {export_path}/graph.edges.count.txt ) |"
+                    "tee >( cut -d: -f3,6 | {counter_command} | sort "
+                    "           > {export_path}/graph.edges.stats.txt ) |"
                     "cut -d' ' -f2 | "
                     "cat - <( zstdcat {export_path}/*/*.nodes.csv.zst ) | "
                     "sort -u -S{sort_buffer_size} -T{buffer_path} | "
                     "tee >( wc -l > {export_path}/graph.nodes.count.txt ) |"
+                    "tee >( cut -d: -f3 | {counter_command} | sort "
+                    "           > {export_path}/graph.nodes.stats.txt ) |"
                     "zstdmt > {export_path}/graph.nodes.csv.zst"
                 ).format(
                     export_path=shlex.quote(str(export_path)),
                     buffer_path=shlex.quote(str(buffer_path)),
                     sort_buffer_size=shlex.quote(sort_buffer_size),
+                    counter_command=counter_command,
                 ),
             ],
             env=env,
diff --git a/swh/dataset/test/test_graph.py b/swh/dataset/test/test_graph.py
--- a/swh/dataset/test/test_graph.py
+++ b/swh/dataset/test/test_graph.py
@@ -3,6 +3,7 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import collections
 import hashlib
 from typing import Tuple
 
@@ -484,9 +485,32 @@
     output_nodes = list(filter(bool, output_nodes))
     output_edges = list(filter(bool, output_edges))
 
-    expected_nodes = set(input_nodes) | set(l.split()[1] for l in input_edges)
+    expected_nodes = set(input_nodes) | set(e.split()[1] for e in input_edges)
     assert output_nodes == sorted(expected_nodes)
     assert int((tmp_path / "graph.nodes.count.txt").read_text()) == len(expected_nodes)
 
     assert sorted(output_edges) == sorted(input_edges)
     assert int((tmp_path / "graph.edges.count.txt").read_text()) == len(input_edges)
+
+    actual_node_stats = (tmp_path / "graph.nodes.stats.txt").read_text().strip()
+    expected_node_stats = "\n".join(
+        sorted(
+            "{} {}".format(k, v)
+            for k, v in collections.Counter(
+                node.split(":")[2] for node in expected_nodes
+            ).items()
+        )
+    )
+    assert actual_node_stats == expected_node_stats
+
+    actual_edge_stats = (tmp_path / "graph.edges.stats.txt").read_text().strip()
+    expected_edge_stats = "\n".join(
+        sorted(
+            "{} {}".format(k, v)
+            for k, v in collections.Counter(
+                "{}:{}".format(edge.split(":")[2], edge.split(":")[5])
+                for edge in input_edges
+            ).items()
+        )
+    )
+    assert actual_edge_stats == expected_edge_stats