Changeset View
Changeset View
Standalone View
Standalone View
swh/search/in_memory.py
Show All 12 Lines | |||||
from swh.model import model | from swh.model import model | ||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.search.interface import ( | from swh.search.interface import ( | ||||
SORT_BY_OPTIONS, | SORT_BY_OPTIONS, | ||||
MinimalOriginDict, | MinimalOriginDict, | ||||
OriginDict, | OriginDict, | ||||
PagedResult, | PagedResult, | ||||
) | ) | ||||
from swh.search.utils import get_expansion, is_date_parsable | from swh.search.utils import get_expansion, parse_and_format_date | ||||
_words_regexp = re.compile(r"\w+") | _words_regexp = re.compile(r"\w+") | ||||
def _dict_words_set(d): | def _dict_words_set(d): | ||||
"""Recursively extract set of words from dict content.""" | """Recursively extract set of words from dict content.""" | ||||
values = set() | values = set() | ||||
▲ Show 20 Lines • Show All 207 Lines • ▼ Show 20 Lines | def origin_update(self, documents: Iterable[OriginDict]) -> None: | ||||
intrinsic_metadata = document["intrinsic_metadata"] | intrinsic_metadata = document["intrinsic_metadata"] | ||||
for date_field in ["dateCreated", "dateModified", "datePublished"]: | for date_field in ["dateCreated", "dateModified", "datePublished"]: | ||||
if date_field in intrinsic_metadata: | if date_field in intrinsic_metadata: | ||||
date = intrinsic_metadata[date_field] | date = intrinsic_metadata[date_field] | ||||
# If date{Created,Modified,Published} value isn't parsable | # If date{Created,Modified,Published} value isn't parsable | ||||
# It gets rejected and isn't stored (unlike other fields) | # It gets rejected and isn't stored (unlike other fields) | ||||
if not is_date_parsable(date): | formatted_date = parse_and_format_date(date) | ||||
if formatted_date is None: | |||||
intrinsic_metadata.pop(date_field) | intrinsic_metadata.pop(date_field) | ||||
else: | |||||
intrinsic_metadata[date_field] = formatted_date | |||||
document["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata) | document["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata) | ||||
if len(document["intrinsic_metadata"]) != 1: | if len(document["intrinsic_metadata"]) != 1: | ||||
continue | continue | ||||
metadata = document["intrinsic_metadata"][0] | metadata = document["intrinsic_metadata"][0] | ||||
if "http://schema.org/license" in metadata: | if "http://schema.org/license" in metadata: | ||||
▲ Show 20 Lines • Show All 263 Lines • Show Last 20 Lines |