Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | for group in groups: | ||||
yield from f(list(group)) | yield from f(list(group)) | ||||
class ExtrinsicMetadataIndexer( | class ExtrinsicMetadataIndexer( | ||||
BaseIndexer[Sha1Git, RawExtrinsicMetadata, OriginExtrinsicMetadataRow] | BaseIndexer[Sha1Git, RawExtrinsicMetadata, OriginExtrinsicMetadataRow] | ||||
): | ): | ||||
def process_journal_objects(self, objects: ObjectsDict) -> Dict: | def process_journal_objects(self, objects: ObjectsDict) -> Dict: | ||||
summary: Dict[str, Any] = {"status": "uneventful"} | summary: Dict[str, Any] = {"status": "uneventful"} | ||||
with sentry_sdk.push_scope() as scope: | |||||
try: | try: | ||||
results = [] | results = [] | ||||
for item in objects.get("raw_extrinsic_metadata", []): | for item in objects.get("raw_extrinsic_metadata", []): | ||||
remd = RawExtrinsicMetadata.from_dict(item) | remd = RawExtrinsicMetadata.from_dict(item) | ||||
scope.set_tag("swh-indexer-remd-swhid", remd.swhid()) | |||||
results.extend(self.index(remd.id, data=remd)) | results.extend(self.index(remd.id, data=remd)) | ||||
except Exception: | except Exception: | ||||
if not self.catch_exceptions: | if not self.catch_exceptions: | ||||
raise | raise | ||||
summary["status"] = "failed" | summary["status"] = "failed" | ||||
return summary | return summary | ||||
summary_persist = self.persist_index_computations(results) | summary_persist = self.persist_index_computations(results) | ||||
self.results = results | self.results = results | ||||
if summary_persist: | if summary_persist: | ||||
for value in summary_persist.values(): | for value in summary_persist.values(): | ||||
if value > 0: | if value > 0: | ||||
summary["status"] = "eventful" | summary["status"] = "eventful" | ||||
summary.update(summary_persist) | summary.update(summary_persist) | ||||
▲ Show 20 Lines • Show All 319 Lines • ▼ Show 20 Lines | |||||
): | ): | ||||
USE_TOOLS = False | USE_TOOLS = False | ||||
def __init__(self, config=None, **kwargs) -> None: | def __init__(self, config=None, **kwargs) -> None: | ||||
super().__init__(config=config, **kwargs) | super().__init__(config=config, **kwargs) | ||||
self.directory_metadata_indexer = DirectoryMetadataIndexer(config=config) | self.directory_metadata_indexer = DirectoryMetadataIndexer(config=config) | ||||
def index_list( | def index_list( | ||||
self, origins: List[Origin], check_origin_known: bool = True, **kwargs | self, | ||||
origins: List[Origin], | |||||
*, | |||||
check_origin_known: bool = True, | |||||
sentry_scope=None, | |||||
**kwargs, | |||||
) -> List[Tuple[OriginIntrinsicMetadataRow, DirectoryIntrinsicMetadataRow]]: | ) -> List[Tuple[OriginIntrinsicMetadataRow, DirectoryIntrinsicMetadataRow]]: | ||||
head_rev_ids = [] | head_rev_ids = [] | ||||
head_rel_ids = [] | head_rel_ids = [] | ||||
origin_heads: Dict[Origin, CoreSWHID] = {} | origin_heads: Dict[Origin, CoreSWHID] = {} | ||||
# Filter out origins not in the storage | # Filter out origins not in the storage | ||||
if check_origin_known: | if check_origin_known: | ||||
known_origins = list( | known_origins = list( | ||||
Show All 33 Lines | ) -> List[Tuple[OriginIntrinsicMetadataRow, DirectoryIntrinsicMetadataRow]]: | ||||
call_with_batches( | call_with_batches( | ||||
self.storage.release_get, head_rel_ids, RELEASE_GET_BATCH_SIZE | self.storage.release_get, head_rel_ids, RELEASE_GET_BATCH_SIZE | ||||
), | ), | ||||
) | ) | ||||
) | ) | ||||
results = [] | results = [] | ||||
for (origin, head_swhid) in origin_heads.items(): | for (origin, head_swhid) in origin_heads.items(): | ||||
sentry_scope.set_tag("swh-indexer-origin-url", origin.url) | |||||
sentry_scope.set_tag("swh-indexer-origin-head-swhid", str(head_swhid)) | |||||
if head_swhid.object_type == ObjectType.REVISION: | if head_swhid.object_type == ObjectType.REVISION: | ||||
rev = head_revs[head_swhid.object_id] | rev = head_revs[head_swhid.object_id] | ||||
if not rev: | if not rev: | ||||
self.log.warning( | self.log.warning( | ||||
"Missing head object %s of origin %r", head_swhid, origin.url | "Missing head object %s of origin %r", head_swhid, origin.url | ||||
) | ) | ||||
continue | continue | ||||
directory_id = rev.directory | directory_id = rev.directory | ||||
▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines |