Differential D8842 Diff 31870 swh/web/utils/archive.py

Changeset View

Standalone View

swh/web/utils/archive.py

Show First 20 Lines • Show All 351 Lines • ▼ Show 20 Lines	) -> Iterable[OriginMetadataInfo]:
if (		if (
search		search
and config.get_config()["search_config"]["metadata_backend"] == "swh-search"		and config.get_config()["search_config"]["metadata_backend"] == "swh-search"
):		):
page_result = search.origin_search(		page_result = search.origin_search(
metadata_pattern=fulltext,		metadata_pattern=fulltext,
limit=limit,		limit=limit,
)		)
matches = idx_storage.origin_intrinsic_metadata_get(		origin_urls = [r["url"] for r in page_result.results]
[r["url"] for r in page_result.results]		metadata = {
)		r.id: r for r in idx_storage.origin_intrinsic_metadata_get(origin_urls)
		}

		# Results from swh-search are not guaranteed to be in
		# idx_storage.origin_intrinsic_metadata (typically when they come from
		# extrinsic metadata; or when the swh-indexer cache is cleared).
		# When they are missing, we only return the origin url.
		matches = [
		metadata[url].to_dict() if url in metadata else {"id": url}
		for url in origin_urls
		ardumontUnsubmitted Not Done Inline Actions I gather metadata is a dict with keys all urls present in origin_urls (be there results or not from the indexer storage call line 362), right? (I'm trying to determine whether `metadata[url]` can raise KeyError somehow). ardumont: I gather metadata is a dict with keys all urls present in origin_urls (be there results or not…
		vlorentzAuthorUnsubmitted Done Inline Actions I think you missed the ternary, which checks the key is in the dict before accessing it vlorentz: I think you missed the ternary, which checks the key is in the dict before accessing it
		ardumontUnsubmitted Not Done Inline Actions right, thx! ardumont: right, thx!
		]
else:		else:
matches = idx_storage.origin_intrinsic_metadata_search_fulltext(		matches = [
		match.to_dict()
		for match in idx_storage.origin_intrinsic_metadata_search_fulltext(
conjunction=[fulltext], limit=limit		conjunction=[fulltext], limit=limit
)		)
		]

matches = [match.to_dict() for match in matches]
origins = storage.origin_get([match["id"] for match in matches])		origins = storage.origin_get([match["id"] for match in matches])

for origin, match in zip(origins, matches):		for origin, match in zip(origins, matches):
if not origin:		if not origin:
		# filter out origins not present in the storage, as we do not have any
		# meaningful content to display for that origin at the moment.
		# This may occur when the storage database we use is lagging behind
		# swh-search
continue		continue
for field in ("from_directory", "from_revision"):		for field in ("from_directory", "from_revision"):
# from_directory when using swh.indexer >= 2.0.0, from_revision otherwise		# from_directory when using swh.indexer >= 2.0.0, from_revision otherwise
if field in match:		if field in match:
match[field] = hashutil.hash_to_hex(match[field])		match[field] = hashutil.hash_to_hex(match[field])
del match["id"]		del match["id"]
results.append(OriginMetadataInfo(url=origin.url, metadata=match))		results.append(OriginMetadataInfo(url=origin.url, metadata=match))

▲ Show 20 Lines • Show All 1,056 Lines • Show Last 20 Lines