Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata.py
Show All 10 Lines | from typing import ( | ||||
Iterable, | Iterable, | ||||
Iterator, | Iterator, | ||||
List, | List, | ||||
Optional, | Optional, | ||||
Tuple, | Tuple, | ||||
TypeVar, | TypeVar, | ||||
) | ) | ||||
import sentry_sdk | |||||
from swh.core.config import merge_configs | from swh.core.config import merge_configs | ||||
from swh.core.utils import grouper | from swh.core.utils import grouper | ||||
from swh.indexer.codemeta import merge_documents | from swh.indexer.codemeta import merge_documents | ||||
from swh.indexer.indexer import ContentIndexer, OriginIndexer, RevisionIndexer | from swh.indexer.indexer import ContentIndexer, OriginIndexer, RevisionIndexer | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_dictionary import MAPPINGS | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
from swh.indexer.origin_head import OriginHeadIndexer | from swh.indexer.origin_head import OriginHeadIndexer | ||||
from swh.indexer.storage import INDEXER_CFG_KEY, Sha1 | from swh.indexer.storage import INDEXER_CFG_KEY, Sha1 | ||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | ) -> List[ContentMetadataRow]: | ||||
mapping_name = self.tool["tool_configuration"]["context"] | mapping_name = self.tool["tool_configuration"]["context"] | ||||
log_suffix += ", content_id=%s" % hashutil.hash_to_hex(id) | log_suffix += ", content_id=%s" % hashutil.hash_to_hex(id) | ||||
metadata = MAPPINGS[mapping_name](log_suffix).translate(data) | metadata = MAPPINGS[mapping_name](log_suffix).translate(data) | ||||
except Exception: | except Exception: | ||||
self.log.exception( | self.log.exception( | ||||
"Problem during metadata translation " | "Problem during metadata translation " | ||||
"for content %s" % hashutil.hash_to_hex(id) | "for content %s" % hashutil.hash_to_hex(id) | ||||
) | ) | ||||
sentry_sdk.capture_exception() | |||||
if metadata is None: | if metadata is None: | ||||
return [] | return [] | ||||
return [ | return [ | ||||
ContentMetadataRow( | ContentMetadataRow( | ||||
id=id, | id=id, | ||||
indexer_configuration_id=self.tool["id"], | indexer_configuration_id=self.tool["id"], | ||||
metadata=metadata, | metadata=metadata, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | ) -> List[RevisionIntrinsicMetadataRow]: | ||||
files = [entry for entry in dir_ls if entry["type"] == "file"] | files = [entry for entry in dir_ls if entry["type"] == "file"] | ||||
detected_files = detect_metadata(files) | detected_files = detect_metadata(files) | ||||
(mappings, metadata) = self.translate_revision_intrinsic_metadata( | (mappings, metadata) = self.translate_revision_intrinsic_metadata( | ||||
detected_files, | detected_files, | ||||
log_suffix="revision=%s" % hashutil.hash_to_hex(rev.id), | log_suffix="revision=%s" % hashutil.hash_to_hex(rev.id), | ||||
) | ) | ||||
except Exception as e: | except Exception as e: | ||||
self.log.exception("Problem when indexing rev: %r", e) | self.log.exception("Problem when indexing rev: %r", e) | ||||
sentry_sdk.capture_exception() | |||||
return [ | return [ | ||||
RevisionIntrinsicMetadataRow( | RevisionIntrinsicMetadataRow( | ||||
id=rev.id, | id=rev.id, | ||||
indexer_configuration_id=self.tool["id"], | indexer_configuration_id=self.tool["id"], | ||||
mappings=mappings, | mappings=mappings, | ||||
metadata=metadata, | metadata=metadata, | ||||
) | ) | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | ) -> Tuple[List[Any], Any]: | ||||
) | ) | ||||
# on the fly possibility: | # on the fly possibility: | ||||
for result in c_metadata_indexer.results: | for result in c_metadata_indexer.results: | ||||
local_metadata = result.metadata | local_metadata = result.metadata | ||||
metadata.append(local_metadata) | metadata.append(local_metadata) | ||||
except Exception: | except Exception: | ||||
self.log.exception("Exception while indexing metadata on contents") | self.log.exception("Exception while indexing metadata on contents") | ||||
sentry_sdk.capture_exception() | |||||
metadata = merge_documents(metadata) | metadata = merge_documents(metadata) | ||||
return (used_mappings, metadata) | return (used_mappings, metadata) | ||||
class OriginMetadataIndexer( | class OriginMetadataIndexer( | ||||
OriginIndexer[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]] | OriginIndexer[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]] | ||||
): | ): | ||||
▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines |