Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/cli.py
Show All 36 Lines | "provenance": { | ||||
"db": { | "db": { | ||||
"host": "db.internal.softwareheritage.org", | "host": "db.internal.softwareheritage.org", | ||||
"dbname": "softwareheritage", | "dbname": "softwareheritage", | ||||
"user": "guest", | "user": "guest", | ||||
}, | }, | ||||
}, | }, | ||||
"storage": { | "storage": { | ||||
# Local PostgreSQL Storage | # Local PostgreSQL Storage | ||||
# "cls": "postgresql", | |||||
# "db": { | |||||
# "host": "localhost", | |||||
# "user": "postgres", | |||||
# "password": "postgres", | |||||
# "dbname": "provenance", | |||||
# }, | |||||
# Local MongoDB Storage | |||||
# "cls": "mongodb", | |||||
# "db": { | |||||
# "dbname": "provenance", | |||||
# }, | |||||
# Remote RabbitMQ/PostgreSQL Storage | |||||
"cls": "rabbitmq", | |||||
"url": "amqp://localhost:5672/%2f", | |||||
"storage_config": { | |||||
"cls": "postgresql", | "cls": "postgresql", | ||||
"db": { | "db": { | ||||
"host": "localhost", | "host": "localhost", | ||||
"user": "postgres", | "user": "postgres", | ||||
"password": "postgres", | "password": "postgres", | ||||
"dbname": "provenance", | "dbname": "provenance", | ||||
}, | }, | ||||
# Local MongoDB Storage | }, | ||||
# "cls": "mongodb", | |||||
# "db": { | |||||
# "dbname": "provenance", | |||||
# }, | |||||
# Remote RPC-API/PostgreSQL | # Remote RPC-API/PostgreSQL | ||||
# "cls": "rpcapi", | # "cls": "rpcapi", | ||||
# "url": "http://localhost:8080/%2f", | # "url": "http://localhost:8080/%2f", | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | for revision in revisions: | ||||
revision_add( | revision_add( | ||||
provenance, | provenance, | ||||
archive, | archive, | ||||
[revision], | [revision], | ||||
trackall=track_all, | trackall=track_all, | ||||
lower=reuse, | lower=reuse, | ||||
mindepth=min_depth, | mindepth=min_depth, | ||||
) | ) | ||||
provenance.close() | |||||
def generate_revision_tuples( | def generate_revision_tuples( | ||||
filename: str, | filename: str, | ||||
) -> Generator[Tuple[Sha1Git, datetime, Sha1Git], None, None]: | ) -> Generator[Tuple[Sha1Git, datetime, Sha1Git], None, None]: | ||||
for line in open(filename, "r"): | for line in open(filename, "r"): | ||||
if line.strip(): | if line.strip(): | ||||
revision, date, root = line.strip().split(",") | revision, date, root = line.strip().split(",") | ||||
Show All 15 Lines | def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None: | ||||
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | ||||
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) | provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) | ||||
origins_provider = generate_origin_tuples(filename) | origins_provider = generate_origin_tuples(filename) | ||||
origins = CSVOriginIterator(origins_provider, limit=limit) | origins = CSVOriginIterator(origins_provider, limit=limit) | ||||
for origin in origins: | for origin in origins: | ||||
origin_add(provenance, archive, [origin]) | origin_add(provenance, archive, [origin]) | ||||
provenance.close() | |||||
def generate_origin_tuples(filename: str) -> Generator[Tuple[str, bytes], None, None]: | def generate_origin_tuples(filename: str) -> Generator[Tuple[str, bytes], None, None]: | ||||
for line in open(filename, "r"): | for line in open(filename, "r"): | ||||
if line.strip(): | if line.strip(): | ||||
url, snapshot = line.strip().split(",") | url, snapshot = line.strip().split(",") | ||||
yield (url, hash_to_bytes(snapshot)) | yield (url, hash_to_bytes(snapshot)) | ||||
Show All 12 Lines | if occur is not None: | ||||
f"swh:1:cnt:{hash_to_hex(occur.content)}, " | f"swh:1:cnt:{hash_to_hex(occur.content)}, " | ||||
f"swh:1:rev:{hash_to_hex(occur.revision)}, " | f"swh:1:rev:{hash_to_hex(occur.revision)}, " | ||||
f"{occur.date}, " | f"{occur.date}, " | ||||
f"{occur.origin}, " | f"{occur.origin}, " | ||||
f"{os.fsdecode(occur.path)}" | f"{os.fsdecode(occur.path)}" | ||||
) | ) | ||||
else: | else: | ||||
print(f"Cannot find a content with the id {swhid}") | print(f"Cannot find a content with the id {swhid}") | ||||
provenance.close() | |||||
@cli.command(name="find-all") | @cli.command(name="find-all") | ||||
@click.argument("swhid") | @click.argument("swhid") | ||||
@click.option("-l", "--limit", type=int) | @click.option("-l", "--limit", type=int) | ||||
@click.pass_context | @click.pass_context | ||||
def find_all(ctx: click.core.Context, swhid: str, limit: Optional[int]) -> None: | def find_all(ctx: click.core.Context, swhid: str, limit: Optional[int]) -> None: | ||||
"""Find all occurrences of the requested blob.""" | """Find all occurrences of the requested blob.""" | ||||
from . import get_provenance | from . import get_provenance | ||||
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) | provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) | ||||
for occur in provenance.content_find_all(hash_to_bytes(swhid), limit=limit): | for occur in provenance.content_find_all(hash_to_bytes(swhid), limit=limit): | ||||
print( | print( | ||||
f"swh:1:cnt:{hash_to_hex(occur.content)}, " | f"swh:1:cnt:{hash_to_hex(occur.content)}, " | ||||
f"swh:1:rev:{hash_to_hex(occur.revision)}, " | f"swh:1:rev:{hash_to_hex(occur.revision)}, " | ||||
f"{occur.date}, " | f"{occur.date}, " | ||||
f"{occur.origin}, " | f"{occur.origin}, " | ||||
f"{os.fsdecode(occur.path)}" | f"{os.fsdecode(occur.path)}" | ||||
) | ) | ||||
provenance.close() |