Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/cookers/git_bare.py
Show First 20 Lines • Show All 322 Lines • ▼ Show 20 Lines | class GitBareCooker(BaseVaultCooker): | ||||
def push_snapshot_subgraph(self, obj_id: Sha1Git) -> None: | def push_snapshot_subgraph(self, obj_id: Sha1Git) -> None: | ||||
"""Fetches a snapshot and all its children, and writes them to disk""" | """Fetches a snapshot and all its children, and writes them to disk""" | ||||
loaded_from_graph = False | loaded_from_graph = False | ||||
if self.graph: | if self.graph: | ||||
revision_ids = [] | revision_ids = [] | ||||
release_ids = [] | release_ids = [] | ||||
directory_ids = [] | |||||
from swh.graph.client import GraphArgumentException | from swh.graph.client import GraphArgumentException | ||||
# First, try to cook using swh-graph, as it is more efficient than | # First, try to cook using swh-graph, as it is more efficient than | ||||
# swh-storage for querying the history | # swh-storage for querying the history | ||||
obj_swhid = identifiers.CoreSWHID( | obj_swhid = identifiers.CoreSWHID( | ||||
object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id, | object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id, | ||||
) | ) | ||||
try: | try: | ||||
swhids = map( | swhids = map( | ||||
identifiers.CoreSWHID.from_string, | identifiers.CoreSWHID.from_string, | ||||
self.graph.visit_nodes(str(obj_swhid), edges="snp:*,rel:*,rev:rev"), | self.graph.visit_nodes(str(obj_swhid), edges="snp:*,rel:*,rev:rev"), | ||||
) | ) | ||||
for swhid in swhids: | for swhid in swhids: | ||||
if swhid.object_type == identifiers.ObjectType.REVISION: | if swhid.object_type == identifiers.ObjectType.REVISION: | ||||
revision_ids.append(swhid.object_id) | revision_ids.append(swhid.object_id) | ||||
elif swhid.object_type == identifiers.ObjectType.RELEASE: | elif swhid.object_type == identifiers.ObjectType.RELEASE: | ||||
release_ids.append(swhid.object_id) | release_ids.append(swhid.object_id) | ||||
elif swhid.object_type == identifiers.ObjectType.DIRECTORY: | |||||
directory_ids.append(swhid.object_id) | |||||
elif swhid.object_type == identifiers.ObjectType.SNAPSHOT: | elif swhid.object_type == identifiers.ObjectType.SNAPSHOT: | ||||
assert ( | assert ( | ||||
swhid.object_id == obj_id | swhid.object_id == obj_id | ||||
), f"Snapshot {obj_id.hex()} references a different snapshot" | ), f"Snapshot {obj_id.hex()} references a different snapshot" | ||||
else: | else: | ||||
raise NotImplementedError( | raise NotImplementedError( | ||||
f"{swhid.object_type} objects in snapshot subgraphs." | f"{swhid.object_type} objects in snapshot subgraphs." | ||||
) | ) | ||||
except GraphArgumentException as e: | except GraphArgumentException as e: | ||||
logger.info( | logger.info( | ||||
"Snapshot %s not found in swh-graph, falling back to fetching " | "Snapshot %s not found in swh-graph, falling back to fetching " | ||||
"history for each branch. %s", | "history for each branch. %s", | ||||
hash_to_hex(obj_id), | hash_to_hex(obj_id), | ||||
e.args[0], | e.args[0], | ||||
) | ) | ||||
else: | else: | ||||
self._push(self._rev_stack, revision_ids) | self._push(self._rev_stack, revision_ids) | ||||
self._push(self._rel_stack, release_ids) | self._push(self._rel_stack, release_ids) | ||||
self._push(self._dir_stack, directory_ids) | |||||
loaded_from_graph = True | loaded_from_graph = True | ||||
# TODO: when self.graph is available and supports edge labels, use it | # TODO: when self.graph is available and supports edge labels, use it | ||||
# directly to get branch names. | # directly to get branch names. | ||||
snapshot = snapshot_get_all_branches(self.storage, obj_id) | snapshot = snapshot_get_all_branches(self.storage, obj_id) | ||||
assert snapshot, "Unknown snapshot" # should have been caught by check_exists() | assert snapshot, "Unknown snapshot" # should have been caught by check_exists() | ||||
for branch in snapshot.branches.values(): | for branch in snapshot.branches.values(): | ||||
if not loaded_from_graph: | if not loaded_from_graph: | ||||
if branch is None: | if branch is None: | ||||
logging.warning("Dangling branch: %r", branch) | logging.warning("Dangling branch: %r", branch) | ||||
elif branch.target_type == TargetType.REVISION: | elif branch.target_type == TargetType.REVISION: | ||||
self.push_revision_subgraph(branch.target) | self.push_revision_subgraph(branch.target) | ||||
elif branch.target_type == TargetType.RELEASE: | elif branch.target_type == TargetType.RELEASE: | ||||
self.push_releases_subgraphs([branch.target]) | self.push_releases_subgraphs([branch.target]) | ||||
elif branch.target_type == TargetType.ALIAS: | elif branch.target_type == TargetType.ALIAS: | ||||
# Nothing to do, this for loop also iterates on the target branch | # Nothing to do, this for loop also iterates on the target branch | ||||
# (if it exists) | # (if it exists) | ||||
pass | pass | ||||
elif branch.target_type == TargetType.DIRECTORY: | |||||
self._push(self._dir_stack, [branch.target]) | |||||
else: | else: | ||||
raise NotImplementedError(f"{branch.target_type} branches") | raise NotImplementedError(f"{branch.target_type} branches") | ||||
self.write_refs(snapshot=snapshot) | self.write_refs(snapshot=snapshot) | ||||
def load_revisions(self, obj_ids: List[Sha1Git]) -> None: | def load_revisions(self, obj_ids: List[Sha1Git]) -> None: | ||||
"""Given a list of revision ids, loads these revisions and their directories; | """Given a list of revision ids, loads these revisions and their directories; | ||||
but not their parent revisions.""" | but not their parent revisions.""" | ||||
▲ Show 20 Lines • Show All 141 Lines • Show Last 20 Lines |