Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 319 Lines • ▼ Show 20 Lines | ): | ||||
USE_TOOLS = False | USE_TOOLS = False | ||||
def __init__(self, config=None, **kwargs) -> None: | def __init__(self, config=None, **kwargs) -> None: | ||||
super().__init__(config=config, **kwargs) | super().__init__(config=config, **kwargs) | ||||
self.origin_head_indexer = OriginHeadIndexer(config=config) | self.origin_head_indexer = OriginHeadIndexer(config=config) | ||||
self.revision_metadata_indexer = RevisionMetadataIndexer(config=config) | self.revision_metadata_indexer = RevisionMetadataIndexer(config=config) | ||||
def index_list( | def index_list( | ||||
self, origins: List[Origin], **kwargs | self, origins: List[Origin], check_origin_known: bool = True, **kwargs | ||||
) -> List[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]]: | ) -> List[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]]: | ||||
head_rev_ids = [] | head_rev_ids = [] | ||||
origins_with_head = [] | origins_with_head = [] | ||||
# Filter out origins not in the storage | # Filter out origins not in the storage | ||||
if check_origin_known: | |||||
known_origins = list( | known_origins = list( | ||||
call_with_batches( | call_with_batches( | ||||
self.storage.origin_get, | self.storage.origin_get, | ||||
[origin.url for origin in origins], | [origin.url for origin in origins], | ||||
ORIGIN_GET_BATCH_SIZE, | ORIGIN_GET_BATCH_SIZE, | ||||
) | ) | ||||
) | ) | ||||
else: | |||||
known_origins = list(origins) | |||||
for origin in known_origins: | for origin in known_origins: | ||||
if origin is None: | if origin is None: | ||||
continue | continue | ||||
head_results = self.origin_head_indexer.index(origin.url) | head_results = self.origin_head_indexer.index(origin.url) | ||||
if head_results: | if head_results: | ||||
(head_result,) = head_results | (head_result,) = head_results | ||||
origins_with_head.append(origin) | origins_with_head.append(origin) | ||||
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines |