Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show First 20 Lines • Show All 258 Lines • ▼ Show 20 Lines | class OriginMetadataIndexer(OriginIndexer): | ||||
USE_TOOLS = False | USE_TOOLS = False | ||||
def __init__(self, config=None, **kwargs): | def __init__(self, config=None, **kwargs): | ||||
super().__init__(config=config, **kwargs) | super().__init__(config=config, **kwargs) | ||||
self.origin_head_indexer = OriginHeadIndexer(config=config) | self.origin_head_indexer = OriginHeadIndexer(config=config) | ||||
self.revision_metadata_indexer = RevisionMetadataIndexer(config=config) | self.revision_metadata_indexer = RevisionMetadataIndexer(config=config) | ||||
def index_list(self, origins): | def index_list(self, origin_urls): | ||||
head_rev_ids = [] | head_rev_ids = [] | ||||
origins_with_head = [] | origins_with_head = [] | ||||
origins = self.storage.origin_get( | |||||
[{'url': url} for url in origin_urls]) | |||||
for origin in origins: | for origin in origins: | ||||
head_result = self.origin_head_indexer.index(origin) | head_result = self.origin_head_indexer.index(origin['url']) | ||||
if head_result: | if head_result: | ||||
head_result['origin_id'] = origin['id'] | |||||
origins_with_head.append(origin) | origins_with_head.append(origin) | ||||
head_rev_ids.append(head_result['revision_id']) | head_rev_ids.append(head_result['revision_id']) | ||||
head_revs = list(self.storage.revision_get(head_rev_ids)) | head_revs = list(self.storage.revision_get(head_rev_ids)) | ||||
assert len(head_revs) == len(head_rev_ids) | assert len(head_revs) == len(head_rev_ids) | ||||
results = [] | results = [] | ||||
for (origin, rev) in zip(origins_with_head, head_revs): | for (origin, rev) in zip(origins_with_head, head_revs): | ||||
if not rev: | if not rev: | ||||
self.log.warning('Missing head revision of origin %r', | self.log.warning('Missing head revision of origin %r', | ||||
origin) | origin['url']) | ||||
continue | continue | ||||
rev_metadata = self.revision_metadata_indexer.index(rev) | rev_metadata = self.revision_metadata_indexer.index(rev) | ||||
orig_metadata = { | orig_metadata = { | ||||
'from_revision': rev_metadata['id'], | 'from_revision': rev_metadata['id'], | ||||
'id': origin['id'], | 'id': origin['id'], | ||||
'origin_url': origin['url'], | |||||
'metadata': rev_metadata['metadata'], | 'metadata': rev_metadata['metadata'], | ||||
'mappings': rev_metadata['mappings'], | 'mappings': rev_metadata['mappings'], | ||||
'indexer_configuration_id': | 'indexer_configuration_id': | ||||
rev_metadata['indexer_configuration_id'], | rev_metadata['indexer_configuration_id'], | ||||
} | } | ||||
results.append((orig_metadata, rev_metadata)) | results.append((orig_metadata, rev_metadata)) | ||||
return results | return results | ||||
Show All 39 Lines |