Changeset View
Changeset View
Standalone View
Standalone View
swh/search/utils.py
- This file was added.
from datetime import datetime | |||||
vlorentz: copyright header | |||||
import iso8601 # type: ignore | |||||
def get_expansion(field, sep=None): | |||||
METADATA_FIELDS = { | |||||
"licenses": ["intrinsic_metadata", "http://schema.org/license", "@id"], | |||||
"programming_languages": [ | |||||
"intrinsic_metadata", | |||||
"http://schema.org/programmingLanguage", | |||||
"@value", | |||||
], | |||||
"keywords": ["intrinsic_metadata", "http://schema.org/keywords", "@value",], | |||||
"descriptions": [ | |||||
"intrinsic_metadata", | |||||
"http://schema.org/description", | |||||
"@value", | |||||
], | |||||
"date_created": [ | |||||
"intrinsic_metadata", | |||||
"http://schema.org/dateCreated", | |||||
"@value", | |||||
], | |||||
"date_modified": [ | |||||
"intrinsic_metadata", | |||||
"http://schema.org/dateModified", | |||||
"@value", | |||||
], | |||||
"date_published": [ | |||||
"intrinsic_metadata", | |||||
"http://schema.org/datePublished", | |||||
"@value", | |||||
], | |||||
} | |||||
if sep: | |||||
return sep.join(METADATA_FIELDS[field]) | |||||
return METADATA_FIELDS[field] | |||||
def is_date_parsable(date_str): | |||||
""" | |||||
Return True if date_str is in the format | |||||
%Y-%m-%d or the standard ISO format. | |||||
Otherwise return False. | |||||
""" | |||||
try: | |||||
datetime.strptime(date_str, "%Y-%m-%d") | |||||
return True | |||||
except Exception: | |||||
Not Done Inline Actionsthis does not validate it is in "the standard ISO format": https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat You should use the iso8601 library instead. vlorentz: this does not validate it is in "the standard ISO format": https://docs.python. | |||||
Done Inline ActionsOhh. I see. Thoughts ? KShivendu: Ohh. I see.
But, now I'm thinking that it's better if it can parse strings which don't exactly… | |||||
Not Done Inline ActionsI would rather use a formally defined format, so we don't depend on ElasticSearch-specific format (in case we want to use something else in the future) vlorentz: I would rather use a formally defined format, so we don't depend on ElasticSearch-specific… | |||||
try: | |||||
iso8601.parse_date(date_str) | |||||
return True | |||||
except Exception: | |||||
return False |
copyright header