Changeset View
Changeset View
Standalone View
Standalone View
swh/search/in_memory.py
Show First 20 Lines • Show All 239 Lines • ▼ Show 20 Lines | def origin_update(self, documents: Iterable[OriginDict]) -> None: | ||||
self._origins[id_] | self._origins[id_] | ||||
.get( | .get( | ||||
"last_release_date", | "last_release_date", | ||||
"0001-01-01T00:00:00Z", | "0001-01-01T00:00:00Z", | ||||
) | ) | ||||
.replace("Z", "+00:00") | .replace("Z", "+00:00") | ||||
), | ), | ||||
).isoformat() | ).isoformat() | ||||
if "intrinsic_metadata" in document: | if "jsonld" in document: | ||||
intrinsic_metadata = document["intrinsic_metadata"] | jsonld = document["jsonld"] | ||||
for date_field in ["dateCreated", "dateModified", "datePublished"]: | for date_field in ["dateCreated", "dateModified", "datePublished"]: | ||||
if date_field in intrinsic_metadata: | if date_field in jsonld: | ||||
date = intrinsic_metadata[date_field] | date = jsonld[date_field] | ||||
# If date{Created,Modified,Published} value isn't parsable | # If date{Created,Modified,Published} value isn't parsable | ||||
# It gets rejected and isn't stored (unlike other fields) | # It gets rejected and isn't stored (unlike other fields) | ||||
formatted_date = parse_and_format_date(date) | formatted_date = parse_and_format_date(date) | ||||
if formatted_date is None: | if formatted_date is None: | ||||
intrinsic_metadata.pop(date_field) | jsonld.pop(date_field) | ||||
else: | else: | ||||
intrinsic_metadata[date_field] = formatted_date | jsonld[date_field] = formatted_date | ||||
document["intrinsic_metadata"] = codemeta.expand(intrinsic_metadata) | document["jsonld"] = codemeta.expand(jsonld) | ||||
if len(document["intrinsic_metadata"]) != 1: | if len(document["jsonld"]) != 1: | ||||
continue | continue | ||||
metadata = document["intrinsic_metadata"][0] | metadata = document["jsonld"][0] | ||||
if "http://schema.org/license" in metadata: | if "http://schema.org/license" in metadata: | ||||
metadata["http://schema.org/license"] = [ | metadata["http://schema.org/license"] = [ | ||||
{"@id": license["@id"].lower()} | {"@id": license["@id"].lower()} | ||||
for license in metadata["http://schema.org/license"] | for license in metadata["http://schema.org/license"] | ||||
] | ] | ||||
if "http://schema.org/programmingLanguage" in metadata: | if "http://schema.org/programmingLanguage" in metadata: | ||||
metadata["http://schema.org/programmingLanguage"] = [ | metadata["http://schema.org/programmingLanguage"] = [ | ||||
{"@value": license["@value"].lower()} | {"@value": license["@value"].lower()} | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | ) -> PagedResult[MinimalOriginDict]: | ||||
hits = filter(predicate, hits) | hits = filter(predicate, hits) | ||||
if metadata_pattern: | if metadata_pattern: | ||||
metadata_pattern_words = set( | metadata_pattern_words = set( | ||||
_words_regexp.findall(metadata_pattern.lower()) | _words_regexp.findall(metadata_pattern.lower()) | ||||
) | ) | ||||
def predicate(match): | def predicate(match): | ||||
if "intrinsic_metadata" not in match: | if "jsonld" not in match: | ||||
return False | return False | ||||
return metadata_pattern_words.issubset( | return metadata_pattern_words.issubset(_dict_words_set(match["jsonld"])) | ||||
_dict_words_set(match["intrinsic_metadata"]) | |||||
) | |||||
hits = filter(predicate, hits) | hits = filter(predicate, hits) | ||||
if url_pattern is None and metadata_pattern is None: | if url_pattern is None and metadata_pattern is None: | ||||
raise ValueError( | raise ValueError( | ||||
"At least one of url_pattern and metadata_pattern must be provided." | "At least one of url_pattern and metadata_pattern must be provided." | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 187 Lines • Show Last 20 Lines |