Changeset View
Changeset View
Standalone View
Standalone View
swh/web/utils/archive.py
Show First 20 Lines • Show All 351 Lines • ▼ Show 20 Lines | ) -> Iterable[OriginMetadataInfo]: | ||||
if ( | if ( | ||||
search | search | ||||
and config.get_config()["search_config"]["metadata_backend"] == "swh-search" | and config.get_config()["search_config"]["metadata_backend"] == "swh-search" | ||||
): | ): | ||||
page_result = search.origin_search( | page_result = search.origin_search( | ||||
metadata_pattern=fulltext, | metadata_pattern=fulltext, | ||||
limit=limit, | limit=limit, | ||||
) | ) | ||||
matches = idx_storage.origin_intrinsic_metadata_get( | origin_urls = [r["url"] for r in page_result.results] | ||||
[r["url"] for r in page_result.results] | metadata = { | ||||
) | r.id: r for r in idx_storage.origin_intrinsic_metadata_get(origin_urls) | ||||
} | |||||
# Results from swh-search are not guaranteed to be in | |||||
# idx_storage.origin_intrinsic_metadata (typically when they come from | |||||
# extrinsic metadata; or when the swh-indexer cache is cleared). | |||||
# When they are missing, we only return the origin url. | |||||
matches = [ | |||||
metadata[url].to_dict() if url in metadata else {"id": url} | |||||
for url in origin_urls | |||||
ardumont: I gather metadata is a dict with keys all urls present in origin_urls (be there results or not… | |||||
Done Inline ActionsI think you missed the ternary, which checks the key is in the dict before accessing it vlorentz: I think you missed the ternary, which checks the key is in the dict before accessing it | |||||
Not Done Inline Actionsright, thx! ardumont: right, thx! | |||||
] | |||||
else: | else: | ||||
matches = idx_storage.origin_intrinsic_metadata_search_fulltext( | matches = [ | ||||
match.to_dict() | |||||
for match in idx_storage.origin_intrinsic_metadata_search_fulltext( | |||||
conjunction=[fulltext], limit=limit | conjunction=[fulltext], limit=limit | ||||
) | ) | ||||
] | |||||
matches = [match.to_dict() for match in matches] | |||||
origins = storage.origin_get([match["id"] for match in matches]) | origins = storage.origin_get([match["id"] for match in matches]) | ||||
for origin, match in zip(origins, matches): | for origin, match in zip(origins, matches): | ||||
if not origin: | if not origin: | ||||
# filter out origins not present in the storage, as we do not have any | |||||
# meaningful content to display for that origin at the moment. | |||||
# This may occur when the storage database we use is lagging behind | |||||
# swh-search | |||||
continue | continue | ||||
for field in ("from_directory", "from_revision"): | for field in ("from_directory", "from_revision"): | ||||
# from_directory when using swh.indexer >= 2.0.0, from_revision otherwise | # from_directory when using swh.indexer >= 2.0.0, from_revision otherwise | ||||
if field in match: | if field in match: | ||||
match[field] = hashutil.hash_to_hex(match[field]) | match[field] = hashutil.hash_to_hex(match[field]) | ||||
del match["id"] | del match["id"] | ||||
results.append(OriginMetadataInfo(url=origin.url, metadata=match)) | results.append(OriginMetadataInfo(url=origin.url, metadata=match)) | ||||
▲ Show 20 Lines • Show All 1,056 Lines • Show Last 20 Lines |
I gather metadata is a dict with keys all urls present in origin_urls (be there results or not from the indexer storage call line 362), right?
(I'm trying to determine whether metadata[url] can raise KeyError somehow).