Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/cli.py
Show First 20 Lines • Show All 135 Lines • ▼ Show 20 Lines | if profile: | ||||
pr.disable() | pr.disable() | ||||
pr.dump_stats(profile) | pr.dump_stats(profile) | ||||
atexit.register(exit) | atexit.register(exit) | ||||
@cli.command(name="iter-frontiers") | @cli.command(name="iter-frontiers") | ||||
@click.argument("filename") | @click.argument("filename") | ||||
@click.option("-l", "--limit", type=int) | @click.option( | ||||
@click.option("-s", "--min-size", default=0, type=int) | "-l", | ||||
"--limit", | |||||
type=int, | |||||
help="""Limit the amount of entries (directories) to read from the input file.""", | |||||
) | |||||
@click.option( | |||||
"-s", | |||||
"--min-size", | |||||
default=0, | |||||
type=int, | |||||
help="""Set the minimum size (in bytes) of files to be indexed. """ | |||||
"""Any smaller file will be ignored.""", | |||||
) | |||||
@click.pass_context | @click.pass_context | ||||
def iter_frontiers( | def iter_frontiers( | ||||
ctx: click.core.Context, | ctx: click.core.Context, | ||||
filename: str, | filename: str, | ||||
limit: Optional[int], | limit: Optional[int], | ||||
min_size: int, | min_size: int, | ||||
) -> None: | ) -> None: | ||||
"""Process a provided list of directories in the isochrone frontier.""" | """Process a provided list of directories in the isochrone frontier.""" | ||||
Show All 18 Lines | def generate_directory_ids( | ||||
filename: str, | filename: str, | ||||
) -> Generator[Sha1Git, None, None]: | ) -> Generator[Sha1Git, None, None]: | ||||
for line in open(filename, "r"): | for line in open(filename, "r"): | ||||
if line.strip(): | if line.strip(): | ||||
yield hash_to_bytes(line.strip()) | yield hash_to_bytes(line.strip()) | ||||
@cli.command(name="iter-revisions") | @cli.command(name="iter-revisions") | ||||
@click.argument("filename") | @click.argument("filename") | ||||
douardda: it would be very helpful to add the `help` argument of these click options. | |||||
@click.option("-a", "--track-all", default=True, type=bool) | @click.option( | ||||
@click.option("-l", "--limit", type=int) | "-a", | ||||
@click.option("-m", "--min-depth", default=1, type=int) | "--track-all", | ||||
@click.option("-r", "--reuse", default=True, type=bool) | default=True, | ||||
@click.option("-s", "--min-size", default=0, type=int) | type=bool, | ||||
help="""Index all occurrences of files in the development history.""", | |||||
) | |||||
@click.option( | |||||
"-f", | |||||
"--flatten", | |||||
default=True, | |||||
type=bool, | |||||
help="""Create flat models for directories in the isochrone frontier.""", | |||||
) | |||||
@click.option( | |||||
"-l", | |||||
"--limit", | |||||
type=int, | |||||
help="""Limit the amount of entries (revisions) to read from the input file.""", | |||||
) | |||||
@click.option( | |||||
"-m", | |||||
"--min-depth", | |||||
default=1, | |||||
type=int, | |||||
help="""Set minimum depth (in the directory tree) at which an isochrone """ | |||||
"""frontier can be defined.""", | |||||
) | |||||
@click.option( | |||||
"-r", | |||||
"--reuse", | |||||
default=True, | |||||
type=bool, | |||||
help="""Prioritize the usage of previously defined isochrone frontiers """ | |||||
"""whenever possible.""", | |||||
) | |||||
@click.option( | |||||
"-s", | |||||
"--min-size", | |||||
default=0, | |||||
type=int, | |||||
help="""Set the minimum size (in bytes) of files to be indexed. """ | |||||
"""Any smaller file will be ignored.""", | |||||
) | |||||
@click.pass_context | @click.pass_context | ||||
def iter_revisions( | def iter_revisions( | ||||
ctx: click.core.Context, | ctx: click.core.Context, | ||||
filename: str, | filename: str, | ||||
track_all: bool, | track_all: bool, | ||||
flatten: bool, | |||||
limit: Optional[int], | limit: Optional[int], | ||||
min_depth: int, | min_depth: int, | ||||
reuse: bool, | reuse: bool, | ||||
min_size: int, | min_size: int, | ||||
) -> None: | ) -> None: | ||||
"""Process a provided list of revisions.""" | """Process a provided list of revisions.""" | ||||
from . import get_archive, get_provenance | from . import get_archive, get_provenance | ||||
from .revision import CSVRevisionIterator, revision_add | from .revision import CSVRevisionIterator, revision_add | ||||
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | ||||
revisions_provider = generate_revision_tuples(filename) | revisions_provider = generate_revision_tuples(filename) | ||||
revisions = CSVRevisionIterator(revisions_provider, limit=limit) | revisions = CSVRevisionIterator(revisions_provider, limit=limit) | ||||
with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | ||||
for revision in revisions: | for revision in revisions: | ||||
revision_add( | revision_add( | ||||
provenance, | provenance, | ||||
archive, | archive, | ||||
[revision], | [revision], | ||||
trackall=track_all, | trackall=track_all, | ||||
flatten=flatten, | |||||
lower=reuse, | lower=reuse, | ||||
mindepth=min_depth, | mindepth=min_depth, | ||||
minsize=min_size, | minsize=min_size, | ||||
) | ) | ||||
def generate_revision_tuples( | def generate_revision_tuples( | ||||
filename: str, | filename: str, | ||||
) -> Generator[Tuple[Sha1Git, datetime, Sha1Git], None, None]: | ) -> Generator[Tuple[Sha1Git, datetime, Sha1Git], None, None]: | ||||
for line in open(filename, "r"): | for line in open(filename, "r"): | ||||
if line.strip(): | if line.strip(): | ||||
revision, date, root = line.strip().split(",") | revision, date, root = line.strip().split(",") | ||||
yield ( | yield ( | ||||
hash_to_bytes(revision), | hash_to_bytes(revision), | ||||
iso8601.parse_date(date, default_timezone=timezone.utc), | iso8601.parse_date(date, default_timezone=timezone.utc), | ||||
hash_to_bytes(root), | hash_to_bytes(root), | ||||
) | ) | ||||
@cli.command(name="iter-origins") | @cli.command(name="iter-origins") | ||||
@click.argument("filename") | @click.argument("filename") | ||||
@click.option("-l", "--limit", type=int) | @click.option( | ||||
"-l", | |||||
"--limit", | |||||
type=int, | |||||
help="""Limit the amount of entries (origins) to read from the input file.""", | |||||
) | |||||
@click.pass_context | @click.pass_context | ||||
def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None: | def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None: | ||||
"""Process a provided list of origins.""" | """Process a provided list of origins.""" | ||||
from . import get_archive, get_provenance | from . import get_archive, get_provenance | ||||
from .origin import CSVOriginIterator, origin_add | from .origin import CSVOriginIterator, origin_add | ||||
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | ||||
origins_provider = generate_origin_tuples(filename) | origins_provider = generate_origin_tuples(filename) | ||||
Show All 29 Lines | with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | ||||
f"{os.fsdecode(occur.path)}" | f"{os.fsdecode(occur.path)}" | ||||
) | ) | ||||
else: | else: | ||||
print(f"Cannot find a content with the id {swhid}") | print(f"Cannot find a content with the id {swhid}") | ||||
@cli.command(name="find-all") | @cli.command(name="find-all") | ||||
@click.argument("swhid") | @click.argument("swhid") | ||||
@click.option("-l", "--limit", type=int) | @click.option( | ||||
"-l", "--limit", type=int, help="""Limit the amount results to be retrieved.""" | |||||
) | |||||
@click.pass_context | @click.pass_context | ||||
def find_all(ctx: click.core.Context, swhid: str, limit: Optional[int]) -> None: | def find_all(ctx: click.core.Context, swhid: str, limit: Optional[int]) -> None: | ||||
"""Find all occurrences of the requested blob.""" | """Find all occurrences of the requested blob.""" | ||||
from . import get_provenance | from . import get_provenance | ||||
with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | ||||
for occur in provenance.content_find_all(hash_to_bytes(swhid), limit=limit): | for occur in provenance.content_find_all(hash_to_bytes(swhid), limit=limit): | ||||
print( | print( | ||||
f"swh:1:cnt:{hash_to_hex(occur.content)}, " | f"swh:1:cnt:{hash_to_hex(occur.content)}, " | ||||
f"swh:1:rev:{hash_to_hex(occur.revision)}, " | f"swh:1:rev:{hash_to_hex(occur.revision)}, " | ||||
f"{occur.date}, " | f"{occur.date}, " | ||||
f"{occur.origin}, " | f"{occur.origin}, " | ||||
f"{os.fsdecode(occur.path)}" | f"{os.fsdecode(occur.path)}" | ||||
) | ) |
it would be very helpful to add the help argument of these click options.