Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/from_disk.py
Show All 34 Lines | class GitLoaderFromDisk(DVCSLoader): | ||||
visit_type = "git" | visit_type = "git" | ||||
def __init__(self, url, visit_date=None, directory=None, config=None): | def __init__(self, url, visit_date=None, directory=None, config=None): | ||||
super().__init__(logging_class="swh.loader.git.Loader", config=config) | super().__init__(logging_class="swh.loader.git.Loader", config=config) | ||||
self.origin_url = url | self.origin_url = url | ||||
self.visit_date = visit_date | self.visit_date = visit_date | ||||
self.directory = directory | self.directory = directory | ||||
self.last_visit = None | |||||
def prepare_origin_visit(self, *args, **kwargs): | def prepare_origin_visit(self, *args, **kwargs): | ||||
self.origin = Origin(url=self.origin_url) | self.origin = Origin(url=self.origin_url) | ||||
self.last_visit = self.storage.origin_visit_get_latest(self.origin.url) | |||||
def prepare(self, *args, **kwargs): | def prepare(self, *args, **kwargs): | ||||
self.repo = dulwich.repo.Repo(self.directory) | self.repo = dulwich.repo.Repo(self.directory) | ||||
def iter_objects(self): | def iter_objects(self): | ||||
object_store = self.repo.object_store | object_store = self.repo.object_store | ||||
for pack in object_store.packs: | for pack in object_store.packs: | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | def get_object(self, oid): | ||||
"origin_url": self.origin.url, | "origin_url": self.origin.url, | ||||
}, | }, | ||||
) | ) | ||||
else: | else: | ||||
return obj | return obj | ||||
def fetch_data(self): | def fetch_data(self): | ||||
"""Fetch the data from the data source""" | """Fetch the data from the data source""" | ||||
previous_visit = self.storage.origin_visit_get_latest( | if self.last_visit is None: | ||||
self.origin.url, require_snapshot=True | |||||
) | |||||
if previous_visit: | |||||
self.previous_snapshot_id = previous_visit["snapshot"] | |||||
else: | |||||
self.previous_snapshot_id = None | self.previous_snapshot_id = None | ||||
ardumont: From discussion with @olasd and @douardda, that could be reworked this to… | |||||
Done Inline Actionsdone ardumont: done | |||||
else: | |||||
visit_id = self.last_visit["visit"] | |||||
assert visit_id is not None | |||||
visit_status = self.storage.origin_visit_status_get_latest( | |||||
self.origin.url, visit_id, require_snapshot=True | |||||
) | |||||
self.previous_snapshot_id = visit_status.snapshot if visit_status else None | |||||
type_to_ids = defaultdict(list) | type_to_ids = defaultdict(list) | ||||
for oid in self.iter_objects(): | for oid in self.iter_objects(): | ||||
obj = self.get_object(oid) | obj = self.get_object(oid) | ||||
if not obj: | if not obj: | ||||
continue | continue | ||||
type_name = obj.type_name | type_name = obj.type_name | ||||
type_to_ids[type_name].append(oid) | type_to_ids[type_name].append(oid) | ||||
▲ Show 20 Lines • Show All 250 Lines • Show Last 20 Lines |
From discussion with @olasd and @douardda, that could be reworked this to origin_get_latest_visit_status instead.