Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/loader.py
Show All 35 Lines | |||||
from swh.loader.exception import NotFound | from swh.loader.exception import NotFound | ||||
from swh.loader.mercurial.utils import get_minimum_env | from swh.loader.mercurial.utils import get_minimum_env | ||||
from swh.model import identifiers | from swh.model import identifiers | ||||
from swh.model.hashutil import ( | from swh.model.hashutil import ( | ||||
DEFAULT_ALGORITHMS, | DEFAULT_ALGORITHMS, | ||||
MultiHash, | MultiHash, | ||||
hash_to_bytehex, | hash_to_bytehex, | ||||
hash_to_bytes, | hash_to_bytes, | ||||
hash_to_hex, | |||||
) | ) | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
BaseContent, | BaseContent, | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
ExtID, | |||||
ObjectType, | ObjectType, | ||||
Origin, | Origin, | ||||
Person, | Person, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
SkippedContent, | SkippedContent, | ||||
Show All 12 Lines | |||||
from .objects import SelectiveCache, SimpleTree | from .objects import SelectiveCache, SimpleTree | ||||
TAG_PATTERN = re.compile("[0-9A-Fa-f]{40}") | TAG_PATTERN = re.compile("[0-9A-Fa-f]{40}") | ||||
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.mercurial." | TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.mercurial." | ||||
HEAD_POINTER_NAME = b"tip" | HEAD_POINTER_NAME = b"tip" | ||||
EXTID_TYPE = "hg-nodeid" | |||||
class CommandErrorWrapper(Exception): | class CommandErrorWrapper(Exception): | ||||
"""This exception is raised in place of a 'CommandError' | """This exception is raised in place of a 'CommandError' | ||||
exception (raised by the underlying hglib library) | exception (raised by the underlying hglib library) | ||||
This is needed because billiard.Queue is serializing the | This is needed because billiard.Queue is serializing the | ||||
queued object and as CommandError doesn't have a constructor without | queued object and as CommandError doesn't have a constructor without | ||||
parameters, the deserialization is failing | parameters, the deserialization is failing | ||||
▲ Show 20 Lines • Show All 165 Lines • ▼ Show 20 Lines | def prepare(self): | ||||
directory (str/None): The local directory to load | directory (str/None): The local directory to load | ||||
""" | """ | ||||
self.branches = {} | self.branches = {} | ||||
self.tags = [] | self.tags = [] | ||||
self.releases = {} | self.releases = {} | ||||
self.node_2_rev = {} | self.node_2_rev = {} | ||||
self.heads = {} | self.heads = {} | ||||
self.extids = [] | |||||
directory = self.directory | directory = self.directory | ||||
if not directory: # remote repository | if not directory: # remote repository | ||||
self.working_directory = mkdtemp( | self.working_directory = mkdtemp( | ||||
prefix=TEMPORARY_DIR_PREFIX_PATTERN, | prefix=TEMPORARY_DIR_PREFIX_PATTERN, | ||||
suffix="-%s" % os.getpid(), | suffix="-%s" % os.getpid(), | ||||
dir=self.temp_directory, | dir=self.temp_directory, | ||||
▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines | def get_revisions(self) -> Iterable[Revision]: | ||||
revision = Revision( | revision = Revision( | ||||
author=Person.from_dict(author_dict), | author=Person.from_dict(author_dict), | ||||
date=TimestampWithTimezone.from_dict(date_dict), | date=TimestampWithTimezone.from_dict(date_dict), | ||||
committer=Person.from_dict(author_dict), | committer=Person.from_dict(author_dict), | ||||
committer_date=TimestampWithTimezone.from_dict(date_dict), | committer_date=TimestampWithTimezone.from_dict(date_dict), | ||||
type=RevisionType.MERCURIAL, | type=RevisionType.MERCURIAL, | ||||
directory=directory_id, | directory=directory_id, | ||||
message=commit["message"], | message=commit["message"], | ||||
metadata={"node": hash_to_hex(header["node"]),}, | |||||
extra_headers=tuple(extra_headers), | extra_headers=tuple(extra_headers), | ||||
synthetic=False, | synthetic=False, | ||||
parents=tuple(parents), | parents=tuple(parents), | ||||
) | ) | ||||
self.node_2_rev[header["node"]] = revision.id | self.node_2_rev[header["node"]] = revision.id | ||||
revisions[revision.id] = revision | revisions[revision.id] = revision | ||||
revision_swhid = identifiers.CoreSWHID( | |||||
object_type=identifiers.ObjectType.REVISION, object_id=revision.id, | |||||
) | |||||
self.extids.append( | |||||
ExtID( | |||||
extid_type=EXTID_TYPE, extid=header["node"], target=revision_swhid | |||||
) | |||||
) | |||||
# Converts heads to use swh ids | # Converts heads to use swh ids | ||||
self.heads = { | self.heads = { | ||||
branch_name: (pointer_nature, self.node_2_rev[node_id]) | branch_name: (pointer_nature, self.node_2_rev[node_id]) | ||||
for branch_name, (pointer_nature, node_id) in self.heads.items() | for branch_name, (pointer_nature, node_id) in self.heads.items() | ||||
} | } | ||||
missing_revs = set(revisions.keys()) | missing_revs = set(revisions.keys()) | ||||
if missing_revs: | if missing_revs: | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | def get_snapshot(self) -> Snapshot: | ||||
for name, target in self.releases.items(): | for name, target in self.releases.items(): | ||||
branches[name] = SnapshotBranch( | branches[name] = SnapshotBranch( | ||||
target=target, target_type=TargetType.RELEASE | target=target, target_type=TargetType.RELEASE | ||||
) | ) | ||||
self.snapshot = Snapshot(branches=branches) | self.snapshot = Snapshot(branches=branches) | ||||
return self.snapshot | return self.snapshot | ||||
def store_data(self) -> None: | |||||
super().store_data() | |||||
self.storage.extid_add(self.extids) | |||||
def get_fetch_history_result(self): | def get_fetch_history_result(self): | ||||
"""Return the data to store in fetch_history.""" | """Return the data to store in fetch_history.""" | ||||
return { | return { | ||||
"contents": self.num_contents, | "contents": self.num_contents, | ||||
"directories": self.num_directories, | "directories": self.num_directories, | ||||
"revisions": self.num_revisions, | "revisions": self.num_revisions, | ||||
"releases": self.num_releases, | "releases": self.num_releases, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines |