Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/cli.py
Show First 20 Lines • Show All 158 Lines • ▼ Show 20 Lines | |||||
@click.option( | @click.option( | ||||
"-l", | "-l", | ||||
"--limit", | "--limit", | ||||
type=int, | type=int, | ||||
help="""Limit the amount of entries (origins) to read from the input file.""", | help="""Limit the amount of entries (origins) to read from the input file.""", | ||||
) | ) | ||||
@click.pass_context | @click.pass_context | ||||
def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]): | def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]): | ||||
from .origin import CSVOriginIterator, origin_add | from swh.provenance.algos.origin import CSVOriginIterator, origin_add | ||||
provenance = ctx.obj["provenance"] | provenance = ctx.obj["provenance"] | ||||
archive = ctx.obj["archive"] | archive = ctx.obj["archive"] | ||||
origins_provider = generate_origin_tuples(filename) | origins_provider = generate_origin_tuples(filename) | ||||
origins = CSVOriginIterator(origins_provider, limit=limit) | origins = CSVOriginIterator(origins_provider, limit=limit) | ||||
with provenance: | with provenance: | ||||
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines | def revision_from_csv( | ||||
track_all: bool, | track_all: bool, | ||||
flatten: bool, | flatten: bool, | ||||
limit: Optional[int], | limit: Optional[int], | ||||
min_depth: int, | min_depth: int, | ||||
reuse: bool, | reuse: bool, | ||||
min_size: int, | min_size: int, | ||||
max_directory_size: int, | max_directory_size: int, | ||||
) -> None: | ) -> None: | ||||
from .revision import CSVRevisionIterator, revision_add | from swh.provenance.algos.revision import CSVRevisionIterator, revision_add | ||||
provenance = ctx.obj["provenance"] | provenance = ctx.obj["provenance"] | ||||
archive = ctx.obj["archive"] | archive = ctx.obj["archive"] | ||||
revisions_provider = generate_revision_tuples(filename) | revisions_provider = generate_revision_tuples(filename) | ||||
revisions = CSVRevisionIterator(revisions_provider, limit=limit) | revisions = CSVRevisionIterator(revisions_provider, limit=limit) | ||||
with provenance: | with provenance: | ||||
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines | @click.option( | ||||
"--min-size", | "--min-size", | ||||
default=0, | default=0, | ||||
type=int, | type=int, | ||||
help="""Set the minimum size (in bytes) of files to be indexed. | help="""Set the minimum size (in bytes) of files to be indexed. | ||||
Any smaller file will be ignored.""", | Any smaller file will be ignored.""", | ||||
) | ) | ||||
@click.pass_context | @click.pass_context | ||||
def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size): | def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size): | ||||
from swh.provenance.directory import directory_flatten_range | from swh.provenance.algos.directory import directory_flatten_range | ||||
provenance = ctx.obj["provenance"] | provenance = ctx.obj["provenance"] | ||||
archive = ctx.obj["archive"] | archive = ctx.obj["archive"] | ||||
directory_flatten_range( | directory_flatten_range( | ||||
provenance, | provenance, | ||||
archive, | archive, | ||||
hash_to_bytes(range_from), | hash_to_bytes(range_from), | ||||
Show All 22 Lines | |||||
@click.pass_context | @click.pass_context | ||||
def iter_frontiers( | def iter_frontiers( | ||||
ctx: click.core.Context, | ctx: click.core.Context, | ||||
filename: str, | filename: str, | ||||
limit: Optional[int], | limit: Optional[int], | ||||
min_size: int, | min_size: int, | ||||
) -> None: | ) -> None: | ||||
"""Process a provided list of directories in the isochrone frontier.""" | """Process a provided list of directories in the isochrone frontier.""" | ||||
from . import get_provenance | from swh.provenance import get_provenance | ||||
from .archive import get_archive | from swh.provenance.algos.directory import CSVDirectoryIterator, directory_add | ||||
from .directory import CSVDirectoryIterator, directory_add | from swh.provenance.archive import get_archive | ||||
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | ||||
directories_provider = generate_directory_ids(filename) | directories_provider = generate_directory_ids(filename) | ||||
directories = CSVDirectoryIterator(directories_provider, limit=limit) | directories = CSVDirectoryIterator(directories_provider, limit=limit) | ||||
with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | ||||
for directory in directories: | for directory in directories: | ||||
directory_add( | directory_add( | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | def iter_revisions( | ||||
track_all: bool, | track_all: bool, | ||||
flatten: bool, | flatten: bool, | ||||
limit: Optional[int], | limit: Optional[int], | ||||
min_depth: int, | min_depth: int, | ||||
reuse: bool, | reuse: bool, | ||||
min_size: int, | min_size: int, | ||||
) -> None: | ) -> None: | ||||
"""Process a provided list of revisions.""" | """Process a provided list of revisions.""" | ||||
from . import get_provenance | from swh.provenance import get_provenance | ||||
from .archive import get_archive | from swh.provenance.algos.revision import CSVRevisionIterator, revision_add | ||||
from .revision import CSVRevisionIterator, revision_add | from swh.provenance.archive import get_archive | ||||
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | ||||
revisions_provider = generate_revision_tuples(filename) | revisions_provider = generate_revision_tuples(filename) | ||||
revisions = CSVRevisionIterator(revisions_provider, limit=limit) | revisions = CSVRevisionIterator(revisions_provider, limit=limit) | ||||
with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | ||||
for revision in revisions: | for revision in revisions: | ||||
revision_add( | revision_add( | ||||
Show All 28 Lines | @click.option( | ||||
"--limit", | "--limit", | ||||
type=int, | type=int, | ||||
help="""Limit the amount of entries (origins) to read from the input file.""", | help="""Limit the amount of entries (origins) to read from the input file.""", | ||||
) | ) | ||||
@click.pass_context | @click.pass_context | ||||
@deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead") | @deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead") | ||||
def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None: | def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None: | ||||
"""Process a provided list of origins.""" | """Process a provided list of origins.""" | ||||
from . import get_provenance | from swh.provenance import get_provenance | ||||
from .archive import get_archive | from swh.provenance.algos.origin import CSVOriginIterator, origin_add | ||||
from .origin import CSVOriginIterator, origin_add | from swh.provenance.archive import get_archive | ||||
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) | ||||
origins_provider = generate_origin_tuples(filename) | origins_provider = generate_origin_tuples(filename) | ||||
origins = CSVOriginIterator(origins_provider, limit=limit) | origins = CSVOriginIterator(origins_provider, limit=limit) | ||||
with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance: | ||||
for origin in origins: | for origin in origins: | ||||
origin_add(provenance, archive, [origin]) | origin_add(provenance, archive, [origin]) | ||||
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines |