Page MenuHomeSoftware Heritage

dump.py
No OneTemporary

#!/usr/bin/env python
import logging
import os
from swh.model.hashutil import hash_to_hex
from swh.provenance import get_provenance
from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
# TODO: take conninfo as command line arguments.
conninfo = {
"cls": "local",
"db": {"host": "/var/run/postgresql", "port": "5436", "dbname": "provenance"},
}
def dump(type, hash, time, path="", header="", table=""):
return (
f"{str(header).ljust(5)} | "
f"{str(table).ljust(5)} | "
f"{str(path).ljust(30)} | "
f"{type} {hash_to_hex(hash)} | "
f"{str(time).rjust(10)}"
)
if __name__ == "__main__":
# Set minimum logging level to INFO.
logging.getLogger().setLevel(logging.INFO)
# Get provenance object.
provenance = get_provenance(**conninfo)
# TODO: use ProvenanceStorageInterface instead!
assert isinstance(provenance.storage, ProvenanceStoragePostgreSql)
with provenance.storage.transaction() as cursor:
cursor.execute("""SELECT sha1, date FROM revision ORDER BY date""")
revisions = list(cursor.fetchall())
for idx, (revision, date) in enumerate(revisions):
# Display current revision information.
header = f"R{idx:04}"
timestamp = date.timestamp()
print(f"{timestamp} {hash_to_hex(revision)} {header}")
print(dump("R", revision, timestamp, header=header))
# Display content found early in current revision.
with provenance.storage.transaction() as cursor:
cursor.execute(
"""SELECT content.sha1 AS blob,
content.date AS date,
content_location.path AS path
FROM (SELECT content_in_rev.blob,
location.path
FROM (SELECT content_early_in_rev.blob,
content_early_in_rev.loc
FROM content_early_in_rev
JOIN revision
ON revision.id=content_early_in_rev.rev
WHERE revision.sha1=%s
) AS content_in_rev
JOIN location
ON location.id=content_in_rev.loc
) AS content_location
JOIN content
ON content.id=content_location.blob
ORDER BY path""",
(revision,),
)
content = list(cursor.fetchall())
for blob, date, path in content:
delta = date.timestamp() - timestamp
location = os.fsdecode(path)
print(dump("C", blob, delta, path=location, table="R---C"))
# Display isochrone frontiers found in current revision.
cursor.execute(
"""SELECT directory.sha1 AS dir,
directory.date AS date,
directory_location.path AS path
FROM (SELECT isochrone_frontier.dir,
location.path
FROM (SELECT directory_in_rev.dir,
directory_in_rev.loc
FROM directory_in_rev
JOIN revision
ON revision.id=directory_in_rev.rev
WHERE revision.sha1=%s
) AS isochrone_frontier
JOIN location
ON location.id=isochrone_frontier.loc
) AS directory_location
JOIN directory
ON directory.id=directory_location.dir
ORDER BY path""",
(revision,),
)
directories = list(cursor.fetchall())
for directory, date, path in directories:
delta = date.timestamp() - timestamp
location = os.fsdecode(path) + "/"
if location == "/":
location = "./"
print(dump("D", directory, delta, path=location, table="R-D "))
# Display content found outside the current isochrone frontier.
cursor.execute(
"""SELECT content.sha1 AS blob,
content.date AS date,
content_location.path AS path
FROM (SELECT content_outside.blob,
location.path
FROM (SELECT content_in_dir.blob,
content_in_dir.loc
FROM content_in_dir
JOIN directory
ON directory.id=content_in_dir.dir
WHERE directory.sha1=%s
) AS content_outside
JOIN location
ON location.id=content_outside.loc
) AS content_location
JOIN content
ON content.id=content_location.blob
ORDER BY path""",
(directory,),
)
content = list(cursor.fetchall())
for blob, date, path in content:
delta = date.timestamp() - timestamp
location = " + " + os.fsdecode(path)
print(dump("C", blob, delta, path=location, table=" D-C"))
print("")

File Metadata

Mime Type
text/x-python
Expires
Wed, Jun 4, 7:13 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3269531

Event Timeline